// Copyleft Sanmayce 2012-Dec-03, SKYFALL_TXT2HTML, revision 2.

#define MaxWrdLen 64
#define KAZE_tolower(c) ( (((c) >= 'A') && ((c) <= 'Z')) ? ((c) - 'A' + 'a') : (c) )
#define KAZE_toupper(c) ( (((c) >= 'a') && ((c) <= 'z')) ? ((c) - 'a' + 'A') : (c) )

/*
DANNII MINOGUE:
Where do we go now?
I don't know
Innocence over
Fading fast
...
You're still promising perfection, perfection
With empty words
With empty words
With empty words
With empty words
And it's hard to break a habit
You're lost inside it
...
A moment of coldness
Cuts through me (cuts through me)
I've tried to remember
Why I don't leave (I don't leave)
And you're the cause of my confusion
Closing down the way I feel
How come I don't see so clearly, so clearly
...
*/

#include <stdio.h>
	//#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <stdint.h> // uint8_t needed

#ifndef NULL
#define NULL ((void*)0)
#endif

clock_t clocks1, clocks2;
clock_t clocks3, clocks4;
double TotalRoughSearchTime = 0;

long Railgunhits=0;
unsigned long long GlobalSP = 0;
unsigned long long GlobalI = 0;
      char llTOaDigits2[61]; // 9,223,372,036,854,775,807: 1(sign or carry)+19(digits)+1('\0')+6(,)
      char llTOaDigits3[61]; // 9,223,372,036,854,775,807: 1(sign or carry)+19(digits)+1('\0')+6(,)

#define ASIZE 256
#define XSIZE 384

// Strange bug occurs sometimes?! The whole execution is skipped when three arrays are local?!
//    signed int f[XSIZE+1]; //int f[m+1]; non-dynamical, here up to XSIZE long pattern.
//    signed int s[XSIZE+1];
//    signed int occ[ASIZE];

//#ifndef MAX
//#define MAX(a,b)            (((a) > (b)) ? (a) : (b))
//#endif  /* max */
//#ifndef MIN
//#define MIN(a,b)            (((a) < (b)) ? (a) : (b))
//#endif  /* max */

//   #define MAX(a, b)   ( (a > b) ? (a) : (b) )
#define MAX(a,b)  (((a) > (b)) ? (a) : (b))

void x64toaKAZE (      /* stdcall is faster and smaller... Might as well use it for the helper. */
        unsigned long long val,
        char *buf,
        unsigned radix,
        int is_neg
        )
{
        char *p;                /* pointer to traverse string */
        char *firstdig;         /* pointer to first digit */
        char temp;              /* temp char */
        unsigned digval;        /* value of digit */

        p = buf;

        if ( is_neg )
        {
            *p++ = '-';         /* negative, so output '-' and negate */
            val = (unsigned long long)(-(long long)val);
        }

        firstdig = p;           /* save pointer to first digit */

        do {
            digval = (unsigned) (val % radix);
            val /= radix;       /* get next digit */

            /* convert to ascii and store */
            if (digval > 9)
                *p++ = (char) (digval - 10 + 'a');  /* a letter */
            else
                *p++ = (char) (digval + '0');       /* a digit */
        } while (val > 0);

        /* We now have the digit of the number in the buffer, but in reverse
           order.  Thus we reverse them now. */

        *p-- = '\0';            /* terminate string; p points to last digit */

        do {
            temp = *p;
            *p = *firstdig;
            *firstdig = temp;   /* swap *p and *firstdig */
            --p;
            ++firstdig;         /* advance to next two digits */
        } while (firstdig < p); /* repeat until halfway */
}

/* Actual functions just call conversion helper with neg flag set correctly,
   and return pointer to buffer. */

char * _i64toaKAZE (
        long long val,
        char *buf,
        int radix
        )
{
        x64toaKAZE((unsigned long long)val, buf, radix, (radix == 10 && val < 0));
        return buf;
}

char * _ui64toaKAZE (
        unsigned long long val,
        char *buf,
        int radix
        )
{
        x64toaKAZE(val, buf, radix, 0);
        return buf;
}

char * _ui64toaKAZEzerocomma (
        unsigned long long val,
        char *buf,
        int radix
        )
{
                        char *p;
                        char temp;
                        int txpman;
                        int pxnman;
        x64toaKAZE(val, buf, radix, 0);
                        p = buf;
                        do {
                        } while (*++p != '\0');
                        p--; // p points to last digit
                             // buf points to first digit
                        buf[60] = 0;
                        txpman = 1;
                        pxnman = 0;
                        do
                        { if (buf <= p)
                          { temp = *p;
                            buf[60-txpman] = temp; pxnman++;
                            p--;
                            if (pxnman % 4 == 0)
                            { txpman++;
                              buf[60-txpman] = (char) (',');
                            }
                          }
                          else
                          { buf[60-txpman] = (char) ('0'); pxnman++;
                            if (pxnman % 4 == 0)
                            { txpman++;
                              buf[60-txpman] = (char) (',');
                            }
                          }
                          txpman++;
                        } while (txpman <= 60);
        return buf;
}

char * _ui64toaKAZEcomma (
        unsigned long long val,
        char *buf,
        int radix
        )
{
                        char *p;
                        char temp;
                        int txpman;
                        int pxnman;
        x64toaKAZE(val, buf, radix, 0);
                        p = buf;
                        do {
                        } while (*++p != '\0');
                        p--; // p points to last digit
                             // buf points to first digit
                        buf[26] = 0;
                        txpman = 1;
                        pxnman = 0;
                        while (buf <= p)
                        { temp = *p;
                          buf[26-txpman] = temp; pxnman++;
                          p--;
                          if (pxnman % 3 == 0 && buf <= p)
                          { txpman++;
                            buf[26-txpman] = (char) (',');
                          }
                          txpman++;
                        } 
        return buf+26-(txpman-1);
}


// ### Boyer-Moore-Horspool algorithm [
long HORSPOOL(y, x, n, m)
    char *y, *x;
    long n;
    int m;
   {
    long i;
    int a, j, bm_bc[ASIZE];
    unsigned char ch, lastch;
   
    /* Preprocessing */
    for (a=0; a < ASIZE; a++) bm_bc[a]=m;
    for (j=0; j < m-1; j++) bm_bc[x[j]]=m-j-1;
   
    /* Searching */
    lastch=x[m-1];
    i=0;
    while (i <= n-m) {
       ch=y[i+m-1];
       if (ch ==lastch)
          //if (memcmp(&y[i],x,m-1) == 0) OUTPUT(i);
          if (memcmp(&y[i],x,m-1) == 0) return(i);
       i+=bm_bc[ch];
    }
    return(-1);
   }

long HORSPOOL_hits(y, x, n, m)
    char *y, *x;
    long n;
    int m;
   {
    long i;
    int a, j, bm_bc[ASIZE];
    unsigned char ch, lastch;
   
    /* Preprocessing */
    for (a=0; a < ASIZE; a++) bm_bc[a]=m;
    for (j=0; j < m-1; j++) bm_bc[x[j]]=m-j-1;
   
    /* Searching */
    lastch=x[m-1];
    i=0;
    while (i <= n-m) {
       ch=y[i+m-1];
       if (ch ==lastch)
          //if (memcmp(&y[i],x,m-1) == 0) OUTPUT(i);
          //if (memcmp(&y[i],x,m-1) == 0) return(i);
          if (memcmp(&y[i],x,m-1) == 0) Railgunhits++;
       i+=bm_bc[ch];
    }
    return(-1);
   }

long Boyer_Moore_Horspool_Kaze(y, x, n, m) // m>=2
    char *y, *x;
    long n;
    int m;
   {
    long i;
    int a, j, bm_bc[ASIZE];
    unsigned char ch;
    unsigned char lastch;
    unsigned char firstch;

    unsigned long  count;
    unsigned long  countSTATIC;

    /* Preprocessing */
    for (a=0; a < ASIZE; a++) bm_bc[a]=m;
    for (j=0; j < m-1; j++) bm_bc[x[j]]=m-j-1;
   
    /* Searching */
    lastch=x[m-1];
    firstch=x[0];
    i=0;
        countSTATIC = m-2;   
    while (i <= n-m) {
       ch=y[i+m-1];
       //if (ch ==lastch)
          //if (memcmp(&y[i],x,m-1) == 0) OUTPUT(i);
// Below line gives: 315KB/clock
          //if (ch ==lastch && y[i] == firstch && memcmp(&y[i],x,m-1) == 0) return(i);  // Kaze: The idea(to prevent execution of slower 'memcmp') is borrowed from Karp-Rabin i.e. to perform a slower check only when the target "looks like".
// Below line gives: 328KB/clock
//          if (ch == lastch && y[i] == firstch && memcmp(&y[i+1],&x[0+1],m-1-1) == 0) return(i);  // Kaze: The idea(to prevent execution of slower 'memcmp') is borrowed from Karp-Rabin i.e. to perform a slower check only when the target "looks like".

          if (ch == lastch && y[i] == firstch)
             {
         count = countSTATIC;
         while ( count && *(char *)(x+1+(countSTATIC-count)) == *(char *)(&y[i]+1+(countSTATIC-count)) ) {
               count--;
         }
         if ( count == 0) return(i);
	     }


       i+=bm_bc[ch];
    }
    return(-1);
   }
// ### Boyer-Moore-Horspool algorithm ]


// ### Karp-Rabin algorithm [
   #define REHASH(a, b, h) ((((h) - (a)*d) << 1) + (b))
   long Karp_Rabin(char *y, char *x, long n, int m) {
   int d, hx, hy, i, j;

   /* Preprocessing */
   /* computes d = 2^(m-1) with
      the left-shift operator */
   for (d = i = 1; i < m; ++i)
      d = (d<<1);

   for (hy = hx = i = 0; i < m; ++i) {
      hx = ((hx<<1) + x[i]);
      hy = ((hy<<1) + y[i]);
   }

   /* Searching */
   j = 0;
   while (j <= n-m) {
      if (hx == hy && memcmp(x, y + j, m) == 0) return(j);
      hy = REHASH(y[j], y[j + m], hy);
      ++j;
   }
    return(-1);
   }
// ### Karp-Rabin algorithm ]


// ### Karp-Rabin-Kaze algorithm [
char * KarpRabinKaze (char * pbTarget,
     char * pbPattern,
     unsigned long cbTarget,
     unsigned long cbPattern)
{
    unsigned int    i;
    char *  pbTargetMax = pbTarget + cbTarget;
    char *  pbPatternMax = pbPattern + cbPattern;
    unsigned long  ulBaseToPowerMod = 1;
    register unsigned long  ulHashPattern = 0;
    unsigned long  ulHashTarget = 0;
long hits = 0;
//unsigned long count;
    //char *  buf1;
    //char *  buf2;

    if (cbPattern > cbTarget)
        return(NULL);

    // Compute the power of the left most character in base ulBase
    //for (i = 1; i < cbPattern; i++) ulBaseToPowerMod = (ulBase * ulBaseToPowerMod);

    // Calculate the hash function for the src (and the first dst)
    while (pbPattern < pbPatternMax)
    {
        // Below lines give 366KB/clock for 'underdog':
        //ulHashPattern = (ulHashPattern*ulBase + *pbPattern);
        //ulHashTarget = (ulHashTarget*ulBase + *pbTarget);
        pbPattern++;
        pbTarget++;
    }
        // Below lines give 436KB/clock for 'underdog' + requirement pattern to be 4 chars min.:
        //ulHashPattern = ( (*(long *)(pbPattern-cbPattern)) & 0xffffff00 ) + *(pbPattern-1);
        //ulHashTarget = ( (*(long *)(pbTarget-cbPattern)) & 0xffffff00 ) + *(pbTarget-1);
        // Below lines give 482KB/clock for 'underdog' + requirement pattern to be 2 chars min.:
        //ulHashPattern = ( (*(unsigned short *)(pbPattern-cbPattern)) | *(pbPattern-1) );
        //ulHashTarget = ( (*(unsigned short *)(pbTarget-cbPattern)) | *(pbTarget-1) );
        // Below lines give 482KB/clock for 'underdog' + requirement pattern to be 2 chars min.:
        //ulHashPattern = ( (*(unsigned short *)(pbPattern-cbPattern)) & 0xff00 ) + *(pbPattern-1);
        //ulHashTarget = ( (*(unsigned short *)(pbTarget-cbPattern)) & 0xff00 ) + *(pbTarget-1);
        // Below lines give 605KB/clock for 'underdog' + requirement pattern to be 2 chars min.:
        //ulHashPattern = ( (*(unsigned short *)(pbPattern-cbPattern))<<8 ) + *(pbPattern-1);
        //ulHashTarget = ( (*(unsigned short *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1);
        // Below lines give 668KB/clock for 'underdog':
        ulHashPattern = ( (*(char *)(pbPattern-cbPattern))<<8 ) + *(pbPattern-1);
        ulHashTarget = ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1);

    // Dynamically produce hash values for the string as we go
    for ( ;; )
    {
        if ( (ulHashPattern == ulHashTarget) && !memcmp(pbPattern-cbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
       // if ( ulHashPattern == ulHashTarget ) {
       // 
       //  count = cbPattern;
       //  buf1 = pbPattern-cbPattern;
       //  buf2 = pbTarget-cbPattern;
       //  while ( --count && *(char *)buf1 == *(char *)buf2 ) {
       //          buf1 = (char *)buf1 + 1;
       //          buf2 = (char *)buf2 + 1;
       //  }
       //                 
       //  if ( *((unsigned char *)buf1) - *((unsigned char *)buf2) == 0) hits++;
       //  }
            return((pbTarget-cbPattern));
            //hits++;
                                                             
        if (pbTarget == pbTargetMax)
            return(NULL);

        // Below line gives 482KB/clock for 'underdog' + requirement pattern to be 2 chars min.:
        //ulHashTarget = ( (*(unsigned short *)(pbTarget+1-cbPattern)) | *pbTarget );
        // Below line gives 436KB/clock for 'underdog' + requirement pattern to be 4 chars min.:
        //ulHashTarget = ( (*(long *)(pbTarget+1-cbPattern)) & 0xffffff00 ) + *pbTarget;
//; Line 696
//        movsx   esi, BYTE PTR [ebx]
//        mov     ecx, DWORD PTR [edx+1]
//        and     ecx, -256                               ; ffffff00H
//        add     ecx, esi
        // Below line gives 482KB/clock for 'underdog' + requirement pattern to be 2 chars min.:
        //ulHashTarget = ( (*(unsigned short *)(pbTarget+1-cbPattern)) & 0xff00 ) + *pbTarget;
//; Line 691
//        movsx   esi, BYTE PTR [ebx]
//        xor     ecx, ecx
//        mov     cx, WORD PTR [edx+1]
//        and     ecx, 65280                              ; 0000ff00H
//        add     ecx, esi
        // Below line gives 605KB/clock for 'underdog' + requirement pattern to be 2 chars min.:
        //ulHashTarget = ( (*(unsigned short *)(pbTarget+1-cbPattern))<<8 ) + *pbTarget;
        // Below line gives 668KB/clock for 'underdog':
        ulHashTarget = ( (*(char *)(pbTarget+1-cbPattern))<<8 ) + *pbTarget;
//; Line 718
//        movsx   ecx, BYTE PTR [eax+1]
//        movsx   edx, BYTE PTR [ebp]
//        shl     ecx, 8
//        add     ecx, edx
        // Below line gives 366KB/clock for 'underdog':
        //ulHashTarget = (ulHashTarget - *(pbTarget-cbPattern)*ulBaseToPowerMod)*ulBase + *pbTarget;
        pbTarget++;
    }
}
// ### Karp-Rabin-Kaze algorithm ]


// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm [
// Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char.
char * Railgun_Quadruplet (char * pbTarget,
     char * pbPattern,
     unsigned long cbTarget,
     unsigned long cbPattern)
{
    char * pbTargetMax = pbTarget + cbTarget;
    register unsigned long  ulHashPattern;
    unsigned long ulHashTarget;
    unsigned long count;
    unsigned long countSTATIC;
//  unsigned long countRemainder;

/*
    const unsigned char SINGLET = *(char *)(pbPattern);
    const unsigned long Quadruplet2nd = SINGLET<<8;
    const unsigned long Quadruplet3rd = SINGLET<<16;
    const unsigned long Quadruplet4th = SINGLET<<24;
*/
    unsigned char SINGLET;
    unsigned long Quadruplet2nd;
    unsigned long Quadruplet3rd;
    unsigned long Quadruplet4th;

    unsigned long  AdvanceHopperGrass;

    long i; //BMH needed
    int a, j, bm_bc[ASIZE]; //BMH needed
    unsigned char ch; //BMH needed
//    unsigned char lastch, firstch; //BMH needed

    if (cbPattern > cbTarget)
        return(NULL);

// Doesn't work when cbPattern = 1
// The next IF-fragment works very well with cbPattern>1, OBVIOUSLY IT MUST BE UNROLLED(but crippled with less functionality) SINCE either cbPattern=2 or cbPattern=3!
if ( cbPattern<4) { // This IF makes me unhappy: it slows down from 390KB/clock to 367KB/clock for 'fast' pattern. This fragment(for 2..3 pattern lengths) is needed because I need a function different than strchr but sticking to strstr i.e. lengths above 1 are to be handled.
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));
//        countSTATIC = cbPattern-2;

if ( cbPattern==3) {
    for ( ;; )
    {
        if ( ulHashPattern == ( (*(char *)(pbTarget-3))<<8 ) + *(pbTarget-1) ) {
         if ( *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) return((pbTarget-3));
        }
        if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) pbTarget++;
        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else {
}
    for ( ;; )
    {
        // The line below gives for 'cbPattern'>=1:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/543
        // Karp_Rabin_Kaze_4_OCTETS performance: 372KB/clock
/*
        if ( (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
            return((long)(pbTarget-cbPattern));
*/

        // The fragment below gives for 'cbPattern'>=2:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/546
        // Karp_Rabin_Kaze_4_OCTETS performance: 370KB/clock

/*
//For 2 and 3 [
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
//         count = countSTATIC;
         count = cbPattern-2;
//         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
         while ( count && *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) { // Crippling i.e. only 2 and 3 chars are allowed!
               count--;
         }
         if ( count == 0) return((pbTarget-cbPattern));
        }
        if ( (char)(ulHashPattern>>8) != *(pbTarget-cbPattern+1) ) pbTarget++;
//For 2 and 3 ]
*/


        if ( ulHashPattern == ( (*(char *)(pbTarget-2))<<8 ) + *(pbTarget-1) )
            return((pbTarget-2));
        if ( (char)(ulHashPattern>>8) != *(pbTarget-1) ) pbTarget++;


        // The fragment below gives for 'cbPattern'>=2:
	// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/554
	// Karp_Rabin_Kaze_4_OCTETS performance: 364KB/clock
/*
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
         count = countSTATIC>>2;
         countRemainder = countSTATIC % 4;

         while ( count && *(unsigned long *)(pbPattern+1+((count-1)<<2)) == *(unsigned long *)(pbTarget-cbPattern+1+((count-1)<<2)) ) {
               count--;
         }
	 //if (count == 0) {  // Disastrous degradation only from this line(317KB/clock when 1+2x4+2+1 bytes pattern: 'skillessness'; 312KB/clock when 1+1x4+2+1 bytes pattern: 'underdog'), otherwise 368KB/clock.
         while ( countRemainder && *(char *)(pbPattern+1+(countSTATIC-countRemainder)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-countRemainder)) ) {
               countRemainder--;
         }
         //if ( countRemainder == 0) return((long)(pbTarget-cbPattern));
         if ( count+countRemainder == 0) return((long)(pbTarget-cbPattern));
         //}
        }
*/

        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else { //if ( cbPattern<4)
if (cbTarget<961) // This value is arbitrary(don't know how exactly), it ensures(at least must) better performance than 'Boyer_Moore_Horspool'.
{
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = *(unsigned long *)(pbPattern);
//        countSTATIC = cbPattern-1;

    //SINGLET = *(char *)(pbPattern);
    SINGLET = ulHashPattern & 0xFF;
    Quadruplet2nd = SINGLET<<8;
    Quadruplet3rd = SINGLET<<16;
    Quadruplet4th = SINGLET<<24;

    for ( ;; )
    {
	AdvanceHopperGrass = 0;
	ulHashTarget = *(unsigned long *)(pbTarget-cbPattern);

        if ( ulHashPattern == ulHashTarget ) { // Three unnecessary comparisons here, but 'AdvanceHopperGrass' must be calculated - it has a higher priority.
//         count = countSTATIC;
//         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
//	       if ( countSTATIC==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) AdvanceHopperGrass++;
//               count--;
//         }
         count = cbPattern-1;
         while ( count && *(char *)(pbPattern+(cbPattern-count)) == *(char *)(pbTarget-count) ) {
	       if ( cbPattern-1==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-count) ) AdvanceHopperGrass++;
               count--;
         }
         if ( count == 0) return((pbTarget-cbPattern));
        } else { // The goal here: to avoid memory accesses by stressing the registers.
    if ( Quadruplet2nd != (ulHashTarget & 0x0000FF00) ) {
         AdvanceHopperGrass++;
         if ( Quadruplet3rd != (ulHashTarget & 0x00FF0000) ) {
              AdvanceHopperGrass++;
              if ( Quadruplet4th != (ulHashTarget & 0xFF000000) ) AdvanceHopperGrass++;
         }
    }
	}

	AdvanceHopperGrass++;

	pbTarget = pbTarget + AdvanceHopperGrass;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else { //if (cbTarget<961)
        countSTATIC = cbPattern-2;
    /* Preprocessing */
    for (a=0; a < ASIZE; a++) bm_bc[a]=cbPattern;
    for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1;

    /* Searching */
    //lastch=pbPattern[cbPattern-1];
    //firstch=pbPattern[0];
    i=0;
    while (i <= cbTarget-cbPattern) {
       ch=pbTarget[i+cbPattern-1];
       //if (ch ==lastch)
          //if (memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) OUTPUT(i);
          //if (ch == lastch && pbTarget[i] == firstch && memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) return(i);  // Kaze: The idea(to prevent execution of slower 'memcmp') is borrowed from Karp-Rabin i.e. to perform a slower check only when the target "looks like".
          if (ch == pbPattern[cbPattern-1] && pbTarget[i] == pbPattern[0])
             {
         count = countSTATIC;
         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(&pbTarget[i]+1+(countSTATIC-count)) ) {
               count--;
         }
         if ( count == 0) return(pbTarget+i);
	     }
       i+=bm_bc[ch];
    }
    return(NULL);
} //if (cbTarget<961)
} //if ( cbPattern<4)
}
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm ]


// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm [
// Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char.
char * Railgun_old_FiLa (char * pbTarget,
     char * pbPattern,
     unsigned long cbTarget,
     unsigned long cbPattern)
{
    char *  pbTargetMax = pbTarget + cbTarget;
    register unsigned long  ulHashPattern;
    unsigned long  ulHashTarget;
    unsigned long  count;
    unsigned long  countSTATIC, countRemainder;

    long i; //BMH needed
//    int a, j, bm_bc[ASIZE]; //BMH needed
    unsigned char ch; //BMH needed
//    unsigned char lastch, firstch; //BMH needed

    if (cbPattern > cbTarget)
        return(NULL);

        countSTATIC = cbPattern-2;

// Doesn't work when cbPattern = 1
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));

    for ( ;; )
    {
        // The line below gives for 'cbPattern'>=1:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/543
        // Karp_Rabin_Kaze_4_OCTETS performance: 372KB/clock
/*
        if ( (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
            return((long)(pbTarget-cbPattern));
*/

        // The fragment below gives for 'cbPattern'>=2:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/546
        // Karp_Rabin_Kaze_4_OCTETS performance: 370KB/clock

        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
         count = countSTATIC;
         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
               count--;
         }
         if ( count == 0) return((pbTarget-cbPattern));
        }

        // The fragment below gives for 'cbPattern'>=2:
	// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/554
	// Karp_Rabin_Kaze_4_OCTETS performance: 364KB/clock
/*
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
         count = countSTATIC>>2;
         countRemainder = countSTATIC % 4;

         while ( count && *(unsigned long *)(pbPattern+1+((count-1)<<2)) == *(unsigned long *)(pbTarget-cbPattern+1+((count-1)<<2)) ) {
               count--;
         }
	 //if (count == 0) {  // Disastrous degradation only from this line(317KB/clock when 1+2x4+2+1 bytes pattern: 'skillessness'; 312KB/clock when 1+1x4+2+1 bytes pattern: 'underdog'), otherwise 368KB/clock.
         while ( countRemainder && *(char *)(pbPattern+1+(countSTATIC-countRemainder)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-countRemainder)) ) {
               countRemainder--;
         }
         //if ( countRemainder == 0) return((long)(pbTarget-cbPattern));
         if ( count+countRemainder == 0) return((long)(pbTarget-cbPattern));
         //}
        }
*/

        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
}
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm ]











// Fixed version from 2012-Feb-27.
// Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char.
char * Railgun_Doublet (char * pbTarget, char * pbPattern, uint32_t cbTarget, uint32_t cbPattern)
{
	char * pbTargetMax = pbTarget + cbTarget;
	register uint32_t ulHashPattern;
	uint32_t ulHashTarget, count, countSTATIC;

	if (cbPattern > cbTarget) return(NULL);

	countSTATIC = cbPattern-2;

	pbTarget = pbTarget+cbPattern;
	ulHashPattern = (*(uint16_t *)(pbPattern));

	for ( ;; ) {
		if ( ulHashPattern == (*(uint16_t *)(pbTarget-cbPattern)) ) {
			count = countSTATIC;
			while ( count && *(char *)(pbPattern+2+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+2+(countSTATIC-count)) ) {
				count--;
			}
			if ( count == 0 ) return((pbTarget-cbPattern));
		}
		pbTarget++;
		if (pbTarget > pbTargetMax) return(NULL);
	}
}
/*
; Listing generated by: Microsoft (R) C/C++ Optimizing Compiler Version 16.00.30319.01 for x64
PUBLIC	Railgun_Doublet
pdata	SEGMENT
$pdata$Railgun_Doublet DD imagerel $LN21
	DD	imagerel $LN21+25
	DD	imagerel $unwind$Railgun_Doublet
$pdata$2$Railgun_Doublet DD imagerel $LN21+25
	DD	imagerel $LN21+136
	DD	imagerel $chain$2$Railgun_Doublet
$pdata$4$Railgun_Doublet DD imagerel $LN21+136
	DD	imagerel $LN21+171
	DD	imagerel $chain$4$Railgun_Doublet
pdata	ENDS
xdata	SEGMENT
$unwind$Railgun_Doublet DD 020301H
	DD	030026003H
$chain$2$Railgun_Doublet DD 061221H
	DD	06c412H
	DD	05740aH
	DD	045405H
	DD	imagerel $LN21
	DD	imagerel $LN21+25
	DD	imagerel $unwind$Railgun_Doublet
$chain$4$Railgun_Doublet DD 060021H
	DD	06c400H
	DD	057400H
	DD	045400H
	DD	imagerel $LN21
	DD	imagerel $LN21+25
	DD	imagerel $unwind$Railgun_Doublet
; Function compile flags: /Ogtpy
xdata	ENDS
_TEXT	SEGMENT
tv169 = 24
pbTarget$ = 24
pbPattern$ = 32
cbTarget$ = 40
cbPattern$ = 48
Railgun_Doublet PROC

; 2639 : {

$LN21:
  00c30	40 53		 push	 rbx
  00c32	56		 push	 rsi

; 2640 : 	char * pbTargetMax = pbTarget + cbTarget;

  00c33	41 8b d8	 mov	 ebx, r8d
  00c36	48 8b f2	 mov	 rsi, rdx
  00c39	4c 8b d1	 mov	 r10, rcx
  00c3c	48 03 d9	 add	 rbx, rcx

; 2641 : 	register uint32_t ulHashPattern;
; 2642 : 	uint32_t ulHashTarget, count, countSTATIC;
; 2643 : 
; 2644 : 	if (cbPattern > cbTarget) return(NULL);

  00c3f	45 3b c8	 cmp	 r9d, r8d
  00c42	76 05		 jbe	 SHORT $LN8@Railgun_du
  00c44	33 c0		 xor	 eax, eax

; 2662 : }

  00c46	5e		 pop	 rsi
  00c47	5b		 pop	 rbx
  00c48	c3		 ret	 0
$LN8@Railgun_du:
  00c49	48 89 6c 24 20	 mov	 QWORD PTR [rsp+32], rbp
  00c4e	48 89 7c 24 28	 mov	 QWORD PTR [rsp+40], rdi

; 2645 : 
; 2646 : 	countSTATIC = cbPattern-2;
; 2647 : 
; 2648 : 	pbTarget = pbTarget+cbPattern;
; 2649 : 	ulHashPattern = (*(uint16_t *)(pbPattern));

  00c53	0f b7 3a	 movzx	 edi, WORD PTR [rdx]
  00c56	4c 89 64 24 30	 mov	 QWORD PTR [rsp+48], r12
  00c5b	45 8b e1	 mov	 r12d, r9d

; 2661 : 	}

  00c5e	48 8b ea	 mov	 rbp, rdx
  00c61	4d 03 d4	 add	 r10, r12
  00c64	45 8d 59 fe	 lea	 r11d, DWORD PTR [r9-2]
  00c68	4d 8b c2	 mov	 r8, r10
  00c6b	4d 2b c4	 sub	 r8, r12
  00c6e	48 f7 dd	 neg	 rbp
$LL7@Railgun_du:

; 2650 : 
; 2651 : 	for ( ;; ) {
; 2652 : 		if ( ulHashPattern == (*(uint16_t *)(pbTarget-cbPattern)) ) {

  00c71	41 0f b7 00	 movzx	 eax, WORD PTR [r8]
  00c75	3b f8		 cmp	 edi, eax
  00c77	75 43		 jne	 SHORT $LN2@Railgun_du

; 2653 : 			count = countSTATIC;

  00c79	41 8b d3	 mov	 edx, r11d

; 2654 : 			while ( count && *(char *)(pbPattern+2+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+2+(countSTATIC-count)) ) {

  00c7c	45 85 db	 test	 r11d, r11d
  00c7f	74 1f		 je	 SHORT $LN17@Railgun_du
  00c81	48 8d 4e 02	 lea	 rcx, QWORD PTR [rsi+2]
  00c85	4d 8d 0c 28	 lea	 r9, QWORD PTR [r8+rbp]
  00c89	0f 1f 80 00 00
	00 00		 npad	 7
$LL4@Railgun_du:
  00c90	41 0f b6 04 09	 movzx	 eax, BYTE PTR [r9+rcx]
  00c95	38 01		 cmp	 BYTE PTR [rcx], al
  00c97	75 1f		 jne	 SHORT $LN3@Railgun_du

; 2655 : 				count--;

  00c99	48 ff c1	 inc	 rcx
  00c9c	ff ca		 dec	 edx
  00c9e	75 f0		 jne	 SHORT $LL4@Railgun_du
$LN17@Railgun_du:
  00ca0	48 8b 7c 24 28	 mov	 rdi, QWORD PTR [rsp+40]
  00ca5	48 8b 6c 24 20	 mov	 rbp, QWORD PTR [rsp+32]

; 2656 : 			}
; 2657 : 			if ( count == 0 ) return((pbTarget-cbPattern));

  00caa	4d 2b d4	 sub	 r10, r12
  00cad	4c 8b 64 24 30	 mov	 r12, QWORD PTR [rsp+48]
  00cb2	49 8b c2	 mov	 rax, r10

; 2662 : }

  00cb5	5e		 pop	 rsi
  00cb6	5b		 pop	 rbx
  00cb7	c3		 ret	 0
$LN3@Railgun_du:

; 2656 : 			}
; 2657 : 			if ( count == 0 ) return((pbTarget-cbPattern));

  00cb8	85 d2		 test	 edx, edx
  00cba	74 e4		 je	 SHORT $LN17@Railgun_du
$LN2@Railgun_du:

; 2658 : 		}
; 2659 : 		pbTarget++;

  00cbc	49 ff c2	 inc	 r10
  00cbf	49 ff c0	 inc	 r8

; 2660 : 		if (pbTarget > pbTargetMax) return(NULL);

  00cc2	4c 3b d3	 cmp	 r10, rbx
  00cc5	76 aa		 jbe	 SHORT $LL7@Railgun_du
  00cc7	48 8b 7c 24 28	 mov	 rdi, QWORD PTR [rsp+40]
  00ccc	48 8b 6c 24 20	 mov	 rbp, QWORD PTR [rsp+32]
  00cd1	4c 8b 64 24 30	 mov	 r12, QWORD PTR [rsp+48]
  00cd6	33 c0		 xor	 eax, eax

; 2662 : }

  00cd8	5e		 pop	 rsi
  00cd9	5b		 pop	 rbx
  00cda	c3		 ret	 0
Railgun_Doublet ENDP
_TEXT	ENDS
*/
//
/*
Testbed: [File: OSHO.TXT 206,908,949 bytes; LinesEncountered: 2,459,508; Windows 7 64bit; Microsoft 2010 64bit compiler; E7500 2.93GHz dual DDR2]

Searching for Pattern('an',2bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 1212509/544
strstr_Microsoft performance: 248KB/clock
StrnglenTRAVERSED: 138478024 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 1212509/359
strstr_GNU_C_Library performance: 376KB/clock
StrnglenTRAVERSED: 138478024 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 1212509/321
Railgun_Doublet performance: 421KB/clock
StrnglenTRAVERSED: 138478024 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 1212509/335
Railgun_Quadruplet_8Triplet performance: 403KB/clock
StrnglenTRAVERSED: 138478024 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 1212509/348
Railgun_Mischa_8Triplet performance: 388KB/clock
StrnglenTRAVERSED: 138478024 bytes

BNDM_32_hits/BNDM_32_clocks: 1212509/505
BNDM_32 performance: 267KB/clock
StrnglenTRAVERSED: 138478024 bytes

Searching for Pattern('to',2bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 780175/616
strstr_Microsoft performance: 260KB/clock
StrnglenTRAVERSED: 164505415 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 780175/401
strstr_GNU_C_Library performance: 400KB/clock
StrnglenTRAVERSED: 164505415 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 780175/337
Railgun_Doublet performance: 476KB/clock
StrnglenTRAVERSED: 164505415 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 780175/366
Railgun_Quadruplet_8Triplet performance: 438KB/clock
StrnglenTRAVERSED: 164505415 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 780175/379
Railgun_Mischa_8Triplet performance: 423KB/clock
StrnglenTRAVERSED: 164505415 bytes

BNDM_32_hits/BNDM_32_clocks: 780175/553
BNDM_32 performance: 290KB/clock
StrnglenTRAVERSED: 164505415 bytes

Searching for Pattern('TDK',3bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/627
strstr_Microsoft performance: 318KB/clock
StrnglenTRAVERSED: 204449441 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/354
strstr_GNU_C_Library performance: 564KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 0/362
Railgun_Doublet performance: 551KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 0/354
Railgun_Quadruplet_8Triplet performance: 564KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 0/359
Railgun_Mischa_8Triplet performance: 556KB/clock
StrnglenTRAVERSED: 204449441 bytes

BNDM_32_hits/BNDM_32_clocks: 0/432
BNDM_32 performance: 462KB/clock
StrnglenTRAVERSED: 204449441 bytes

Searching for Pattern('the',3bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 1192002/568
strstr_Microsoft performance: 233KB/clock
StrnglenTRAVERSED: 135882884 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 1192002/383
strstr_GNU_C_Library performance: 346KB/clock
StrnglenTRAVERSED: 135882884 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 1192002/337
Railgun_Doublet performance: 393KB/clock
StrnglenTRAVERSED: 135882884 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 1192002/354
Railgun_Quadruplet_8Triplet performance: 374KB/clock
StrnglenTRAVERSED: 135882884 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 1192002/363
Railgun_Mischa_8Triplet performance: 365KB/clock
StrnglenTRAVERSED: 135882884 bytes

BNDM_32_hits/BNDM_32_clocks: 1192002/519
BNDM_32 performance: 255KB/clock
StrnglenTRAVERSED: 135882884 bytes

Searching for Pattern('fast',4bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 5384/640
strstr_Microsoft performance: 311KB/clock
StrnglenTRAVERSED: 204186782 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 5384/369
strstr_GNU_C_Library performance: 540KB/clock
StrnglenTRAVERSED: 204186782 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 5384/362
Railgun_Doublet performance: 550KB/clock
StrnglenTRAVERSED: 204186782 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 5384/485
Railgun_Quadruplet_8Triplet performance: 411KB/clock
StrnglenTRAVERSED: 204186782 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 5384/429
Railgun_Mischa_8Triplet performance: 464KB/clock
StrnglenTRAVERSED: 204186782 bytes

BNDM_32_hits/BNDM_32_clocks: 5384/510
BNDM_32 performance: 390KB/clock
StrnglenTRAVERSED: 204186782 bytes

Searching for Pattern('easy',4bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 4825/771
strstr_Microsoft performance: 258KB/clock
StrnglenTRAVERSED: 204202166 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 4825/504
strstr_GNU_C_Library performance: 395KB/clock
StrnglenTRAVERSED: 204202166 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 4825/373
Railgun_Doublet performance: 534KB/clock
StrnglenTRAVERSED: 204202166 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 4825/549
Railgun_Quadruplet_8Triplet performance: 363KB/clock
StrnglenTRAVERSED: 204202166 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 4825/429
Railgun_Mischa_8Triplet performance: 464KB/clock
StrnglenTRAVERSED: 204202166 bytes

BNDM_32_hits/BNDM_32_clocks: 4825/526
BNDM_32 performance: 379KB/clock
StrnglenTRAVERSED: 204202166 bytes

Searching for Pattern('grmbl',5bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/645
strstr_Microsoft performance: 309KB/clock
StrnglenTRAVERSED: 204449441 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/373
strstr_GNU_C_Library performance: 535KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 0/361
Railgun_Doublet performance: 553KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 0/483
Railgun_Quadruplet_8Triplet performance: 413KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 0/429
Railgun_Mischa_8Triplet performance: 465KB/clock
StrnglenTRAVERSED: 204449441 bytes

BNDM_32_hits/BNDM_32_clocks: 0/442
BNDM_32 performance: 451KB/clock
StrnglenTRAVERSED: 204449441 bytes

Searching for Pattern('email',5bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 1/764
strstr_Microsoft performance: 261KB/clock
StrnglenTRAVERSED: 204449414 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 1/497
strstr_GNU_C_Library performance: 401KB/clock
StrnglenTRAVERSED: 204449414 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 1/363
Railgun_Doublet performance: 550KB/clock
StrnglenTRAVERSED: 204449414 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 1/546
Railgun_Quadruplet_8Triplet performance: 365KB/clock
StrnglenTRAVERSED: 204449414 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 1/429
Railgun_Mischa_8Triplet performance: 465KB/clock
StrnglenTRAVERSED: 204449414 bytes

BNDM_32_hits/BNDM_32_clocks: 1/505
BNDM_32 performance: 395KB/clock
StrnglenTRAVERSED: 204449414 bytes

Searching for Pattern('pasting',7bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 2/640
strstr_Microsoft performance: 311KB/clock
StrnglenTRAVERSED: 204449363 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 2/368
strstr_GNU_C_Library performance: 542KB/clock
StrnglenTRAVERSED: 204449363 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 2/357
Railgun_Doublet performance: 559KB/clock
StrnglenTRAVERSED: 204449363 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 2/475
Railgun_Quadruplet_8Triplet performance: 420KB/clock
StrnglenTRAVERSED: 204449363 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 2/429
Railgun_Mischa_8Triplet performance: 465KB/clock
StrnglenTRAVERSED: 204449363 bytes

BNDM_32_hits/BNDM_32_clocks: 2/498
BNDM_32 performance: 400KB/clock
StrnglenTRAVERSED: 204449363 bytes

Searching for Pattern('amazing',7bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 319/710
strstr_Microsoft performance: 281KB/clock
StrnglenTRAVERSED: 204432134 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 319/438
strstr_GNU_C_Library performance: 455KB/clock
StrnglenTRAVERSED: 204432134 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 319/358
Railgun_Doublet performance: 557KB/clock
StrnglenTRAVERSED: 204432134 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 319/506
Railgun_Quadruplet_8Triplet performance: 394KB/clock
StrnglenTRAVERSED: 204432134 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 319/429
Railgun_Mischa_8Triplet performance: 465KB/clock
StrnglenTRAVERSED: 204432134 bytes

BNDM_32_hits/BNDM_32_clocks: 319/461
BNDM_32 performance: 433KB/clock
StrnglenTRAVERSED: 204432134 bytes

Searching for Pattern('underdog',8bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 4/665
strstr_Microsoft performance: 300KB/clock
StrnglenTRAVERSED: 204449185 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 4/391
strstr_GNU_C_Library performance: 510KB/clock
StrnglenTRAVERSED: 204449185 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 4/361
Railgun_Doublet performance: 553KB/clock
StrnglenTRAVERSED: 204449185 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 4/482
Railgun_Quadruplet_8Triplet performance: 414KB/clock
StrnglenTRAVERSED: 204449185 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 4/430
Railgun_Mischa_8Triplet performance: 464KB/clock
StrnglenTRAVERSED: 204449185 bytes

BNDM_32_hits/BNDM_32_clocks: 4/476
BNDM_32 performance: 419KB/clock
StrnglenTRAVERSED: 204449185 bytes

Searching for Pattern('superdog',8bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/695
strstr_Microsoft performance: 287KB/clock
StrnglenTRAVERSED: 204449441 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/426
strstr_GNU_C_Library performance: 468KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 0/355
Railgun_Doublet performance: 562KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 0/500
Railgun_Quadruplet_8Triplet performance: 399KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 0/430
Railgun_Mischa_8Triplet performance: 464KB/clock
StrnglenTRAVERSED: 204449441 bytes

BNDM_32_hits/BNDM_32_clocks: 0/475
BNDM_32 performance: 420KB/clock
StrnglenTRAVERSED: 204449441 bytes

Searching for Pattern('participants',12bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 141/640
strstr_Microsoft performance: 311KB/clock
StrnglenTRAVERSED: 204441500 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 141/369
strstr_GNU_C_Library performance: 541KB/clock
StrnglenTRAVERSED: 204441500 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 141/351
Railgun_Doublet performance: 568KB/clock
StrnglenTRAVERSED: 204441500 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 141/460
Railgun_Quadruplet_8Triplet performance: 434KB/clock
StrnglenTRAVERSED: 204441500 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 141/429
Railgun_Mischa_8Triplet performance: 465KB/clock
StrnglenTRAVERSED: 204441500 bytes

BNDM_32_hits/BNDM_32_clocks: 141/457
BNDM_32 performance: 436KB/clock
StrnglenTRAVERSED: 204441500 bytes

Searching for Pattern('skillessness',12bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/695
strstr_Microsoft performance: 287KB/clock
StrnglenTRAVERSED: 204449441 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/425
strstr_GNU_C_Library performance: 469KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 0/348
Railgun_Doublet performance: 573KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 0/486
Railgun_Quadruplet_8Triplet performance: 410KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 0/428
Railgun_Mischa_8Triplet performance: 466KB/clock
StrnglenTRAVERSED: 204449441 bytes

BNDM_32_hits/BNDM_32_clocks: 0/446
BNDM_32 performance: 447KB/clock
StrnglenTRAVERSED: 204449441 bytes

Searching for Pattern('I should have known',19bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 1/630
strstr_Microsoft performance: 316KB/clock
StrnglenTRAVERSED: 204449346 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 1/358
strstr_GNU_C_Library performance: 557KB/clock
StrnglenTRAVERSED: 204449346 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 1/340
Railgun_Doublet performance: 587KB/clock
StrnglenTRAVERSED: 204449346 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 1/433
Railgun_Quadruplet_8Triplet performance: 461KB/clock
StrnglenTRAVERSED: 204449346 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 1/428
Railgun_Mischa_8Triplet performance: 466KB/clock
StrnglenTRAVERSED: 204449346 bytes

BNDM_32_hits/BNDM_32_clocks: 1/458
BNDM_32 performance: 435KB/clock
StrnglenTRAVERSED: 204449346 bytes

Searching for Pattern('human consciousness',19bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 514/686
strstr_Microsoft performance: 291KB/clock
StrnglenTRAVERSED: 204422699 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 514/414
strstr_GNU_C_Library performance: 482KB/clock
StrnglenTRAVERSED: 204422699 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 514/336
Railgun_Doublet performance: 594KB/clock
StrnglenTRAVERSED: 204422699 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 514/456
Railgun_Quadruplet_8Triplet performance: 437KB/clock
StrnglenTRAVERSED: 204422699 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 514/428
Railgun_Mischa_8Triplet performance: 466KB/clock
StrnglenTRAVERSED: 204422699 bytes

BNDM_32_hits/BNDM_32_clocks: 514/452
BNDM_32 performance: 441KB/clock
StrnglenTRAVERSED: 204422699 bytes

D:\_KAZE_strstr_SHORT-SHOWDOWN_7Trident2_7Hasherezade_7Gulliver2>
*/
//
/*
Testbed: [File: OSHO.TXT 206,908,949 bytes; LinesEncountered: 2,459,508; Windows 7 64bit; Intel 2011 64bit compiler; E7500 2.93GHz dual DDR2]

Searching for Pattern('an',2bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 1212509/567
strstr_Microsoft performance: 238KB/clock
StrnglenTRAVERSED: 138478024 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 1212509/456
strstr_GNU_C_Library performance: 296KB/clock
StrnglenTRAVERSED: 138478024 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 1212509/388
Railgun_Doublet performance: 348KB/clock
StrnglenTRAVERSED: 138478024 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 1212509/417
Railgun_Quadruplet_8Triplet performance: 324KB/clock
StrnglenTRAVERSED: 138478024 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 1212509/406
Railgun_Mischa_8Triplet performance: 333KB/clock
StrnglenTRAVERSED: 138478024 bytes

BNDM_32_hits/BNDM_32_clocks: 1212509/559
BNDM_32 performance: 241KB/clock
StrnglenTRAVERSED: 138478024 bytes

Searching for Pattern('to',2bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 780175/628
strstr_Microsoft performance: 255KB/clock
StrnglenTRAVERSED: 164505415 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 780175/502
strstr_GNU_C_Library performance: 320KB/clock
StrnglenTRAVERSED: 164505415 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 780175/404
Railgun_Doublet performance: 397KB/clock
StrnglenTRAVERSED: 164505415 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 780175/451
Railgun_Quadruplet_8Triplet performance: 356KB/clock
StrnglenTRAVERSED: 164505415 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 780175/439
Railgun_Mischa_8Triplet performance: 365KB/clock
StrnglenTRAVERSED: 164505415 bytes

BNDM_32_hits/BNDM_32_clocks: 780175/611
BNDM_32 performance: 262KB/clock
StrnglenTRAVERSED: 164505415 bytes

Searching for Pattern('TDK',3bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/618
strstr_Microsoft performance: 323KB/clock
StrnglenTRAVERSED: 204449441 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/474
strstr_GNU_C_Library performance: 421KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 0/430
Railgun_Doublet performance: 464KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 0/436
Railgun_Quadruplet_8Triplet performance: 457KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 0/511
Railgun_Mischa_8Triplet performance: 390KB/clock
StrnglenTRAVERSED: 204449441 bytes

BNDM_32_hits/BNDM_32_clocks: 0/488
BNDM_32 performance: 409KB/clock
StrnglenTRAVERSED: 204449441 bytes

Searching for Pattern('the',3bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 1192002/584
strstr_Microsoft performance: 227KB/clock
StrnglenTRAVERSED: 135882884 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 1192002/480
strstr_GNU_C_Library performance: 276KB/clock
StrnglenTRAVERSED: 135882884 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 1192002/404
Railgun_Doublet performance: 328KB/clock
StrnglenTRAVERSED: 135882884 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 1192002/427
Railgun_Quadruplet_8Triplet performance: 310KB/clock
StrnglenTRAVERSED: 135882884 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 1192002/454
Railgun_Mischa_8Triplet performance: 292KB/clock
StrnglenTRAVERSED: 135882884 bytes

BNDM_32_hits/BNDM_32_clocks: 1192002/567
BNDM_32 performance: 234KB/clock
StrnglenTRAVERSED: 135882884 bytes

Searching for Pattern('fast',4bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 5384/633
strstr_Microsoft performance: 315KB/clock
StrnglenTRAVERSED: 204186782 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 5384/478
strstr_GNU_C_Library performance: 417KB/clock
StrnglenTRAVERSED: 204186782 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 5384/430
Railgun_Doublet performance: 463KB/clock
StrnglenTRAVERSED: 204186782 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 5384/558
Railgun_Quadruplet_8Triplet performance: 357KB/clock
StrnglenTRAVERSED: 204186782 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 5384/483
Railgun_Mischa_8Triplet performance: 412KB/clock
StrnglenTRAVERSED: 204186782 bytes

BNDM_32_hits/BNDM_32_clocks: 5384/565
BNDM_32 performance: 352KB/clock
StrnglenTRAVERSED: 204186782 bytes

Searching for Pattern('easy',4bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 4825/773
strstr_Microsoft performance: 257KB/clock
StrnglenTRAVERSED: 204202166 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 4825/617
strstr_GNU_C_Library performance: 323KB/clock
StrnglenTRAVERSED: 204202166 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 4825/441
Railgun_Doublet performance: 452KB/clock
StrnglenTRAVERSED: 204202166 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 4825/619
Railgun_Quadruplet_8Triplet performance: 322KB/clock
StrnglenTRAVERSED: 204202166 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 4825/483
Railgun_Mischa_8Triplet performance: 412KB/clock
StrnglenTRAVERSED: 204202166 bytes

BNDM_32_hits/BNDM_32_clocks: 4825/580
BNDM_32 performance: 343KB/clock
StrnglenTRAVERSED: 204202166 bytes

Searching for Pattern('grmbl',5bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/635
strstr_Microsoft performance: 314KB/clock
StrnglenTRAVERSED: 204449441 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/481
strstr_GNU_C_Library performance: 415KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 0/429
Railgun_Doublet performance: 465KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 0/554
Railgun_Quadruplet_8Triplet performance: 360KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 0/484
Railgun_Mischa_8Triplet performance: 412KB/clock
StrnglenTRAVERSED: 204449441 bytes

BNDM_32_hits/BNDM_32_clocks: 0/496
BNDM_32 performance: 402KB/clock
StrnglenTRAVERSED: 204449441 bytes

Searching for Pattern('email',5bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 1/767
strstr_Microsoft performance: 260KB/clock
StrnglenTRAVERSED: 204449414 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 1/611
strstr_GNU_C_Library performance: 326KB/clock
StrnglenTRAVERSED: 204449414 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 1/431
Railgun_Doublet performance: 463KB/clock
StrnglenTRAVERSED: 204449414 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 1/616
Railgun_Quadruplet_8Triplet performance: 324KB/clock
StrnglenTRAVERSED: 204449414 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 1/484
Railgun_Mischa_8Triplet performance: 412KB/clock
StrnglenTRAVERSED: 204449414 bytes

BNDM_32_hits/BNDM_32_clocks: 1/556
BNDM_32 performance: 359KB/clock
StrnglenTRAVERSED: 204449414 bytes

Searching for Pattern('pasting',7bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 2/634
strstr_Microsoft performance: 314KB/clock
StrnglenTRAVERSED: 204449363 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 2/480
strstr_GNU_C_Library performance: 415KB/clock
StrnglenTRAVERSED: 204449363 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 2/425
Railgun_Doublet performance: 469KB/clock
StrnglenTRAVERSED: 204449363 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 2/547
Railgun_Quadruplet_8Triplet performance: 365KB/clock
StrnglenTRAVERSED: 204449363 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 2/484
Railgun_Mischa_8Triplet performance: 412KB/clock
StrnglenTRAVERSED: 204449363 bytes

BNDM_32_hits/BNDM_32_clocks: 2/544
BNDM_32 performance: 367KB/clock
StrnglenTRAVERSED: 204449363 bytes

Searching for Pattern('amazing',7bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 319/706
strstr_Microsoft performance: 282KB/clock
StrnglenTRAVERSED: 204432134 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 319/543
strstr_GNU_C_Library performance: 367KB/clock
StrnglenTRAVERSED: 204432134 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 319/426
Railgun_Doublet performance: 468KB/clock
StrnglenTRAVERSED: 204432134 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 319/577
Railgun_Quadruplet_8Triplet performance: 345KB/clock
StrnglenTRAVERSED: 204432134 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 319/483
Railgun_Mischa_8Triplet performance: 413KB/clock
StrnglenTRAVERSED: 204432134 bytes

BNDM_32_hits/BNDM_32_clocks: 319/509
BNDM_32 performance: 392KB/clock
StrnglenTRAVERSED: 204432134 bytes

Searching for Pattern('underdog',8bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 4/656
strstr_Microsoft performance: 304KB/clock
StrnglenTRAVERSED: 204449185 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 4/492
strstr_GNU_C_Library performance: 405KB/clock
StrnglenTRAVERSED: 204449185 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 4/430
Railgun_Doublet performance: 464KB/clock
StrnglenTRAVERSED: 204449185 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 4/553
Railgun_Quadruplet_8Triplet performance: 361KB/clock
StrnglenTRAVERSED: 204449185 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 4/484
Railgun_Mischa_8Triplet performance: 412KB/clock
StrnglenTRAVERSED: 204449185 bytes

BNDM_32_hits/BNDM_32_clocks: 4/521
BNDM_32 performance: 383KB/clock
StrnglenTRAVERSED: 204449185 bytes

Searching for Pattern('superdog',8bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/691
strstr_Microsoft performance: 288KB/clock
StrnglenTRAVERSED: 204449441 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/528
strstr_GNU_C_Library performance: 378KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 0/424
Railgun_Doublet performance: 470KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 0/571
Railgun_Quadruplet_8Triplet performance: 349KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 0/484
Railgun_Mischa_8Triplet performance: 412KB/clock
StrnglenTRAVERSED: 204449441 bytes

BNDM_32_hits/BNDM_32_clocks: 0/519
BNDM_32 performance: 384KB/clock
StrnglenTRAVERSED: 204449441 bytes

Searching for Pattern('participants',12bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 141/632
strstr_Microsoft performance: 315KB/clock
StrnglenTRAVERSED: 204441500 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 141/480
strstr_GNU_C_Library performance: 415KB/clock
StrnglenTRAVERSED: 204441500 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 141/418
Railgun_Doublet performance: 477KB/clock
StrnglenTRAVERSED: 204441500 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 141/532
Railgun_Quadruplet_8Triplet performance: 375KB/clock
StrnglenTRAVERSED: 204441500 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 141/483
Railgun_Mischa_8Triplet performance: 413KB/clock
StrnglenTRAVERSED: 204441500 bytes

BNDM_32_hits/BNDM_32_clocks: 141/501
BNDM_32 performance: 398KB/clock
StrnglenTRAVERSED: 204441500 bytes

Searching for Pattern('skillessness',12bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/689
strstr_Microsoft performance: 289KB/clock
StrnglenTRAVERSED: 204449441 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/527
strstr_GNU_C_Library performance: 378KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 0/416
Railgun_Doublet performance: 479KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 0/556
Railgun_Quadruplet_8Triplet performance: 359KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 0/482
Railgun_Mischa_8Triplet performance: 414KB/clock
StrnglenTRAVERSED: 204449441 bytes

BNDM_32_hits/BNDM_32_clocks: 0/490
BNDM_32 performance: 407KB/clock
StrnglenTRAVERSED: 204449441 bytes

Searching for Pattern('I should have known',19bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 1/623
strstr_Microsoft performance: 320KB/clock
StrnglenTRAVERSED: 204449346 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 1/477
strstr_GNU_C_Library performance: 418KB/clock
StrnglenTRAVERSED: 204449346 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 1/407
Railgun_Doublet performance: 490KB/clock
StrnglenTRAVERSED: 204449346 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 1/505
Railgun_Quadruplet_8Triplet performance: 395KB/clock
StrnglenTRAVERSED: 204449346 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 1/483
Railgun_Mischa_8Triplet performance: 413KB/clock
StrnglenTRAVERSED: 204449346 bytes

BNDM_32_hits/BNDM_32_clocks: 1/498
BNDM_32 performance: 400KB/clock
StrnglenTRAVERSED: 204449346 bytes

Searching for Pattern('human consciousness',19bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 514/679
strstr_Microsoft performance: 294KB/clock
StrnglenTRAVERSED: 204422699 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 514/514
strstr_GNU_C_Library performance: 388KB/clock
StrnglenTRAVERSED: 204422699 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 514/404
Railgun_Doublet performance: 494KB/clock
StrnglenTRAVERSED: 204422699 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 514/526
Railgun_Quadruplet_8Triplet performance: 379KB/clock
StrnglenTRAVERSED: 204422699 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 514/482
Railgun_Mischa_8Triplet performance: 414KB/clock
StrnglenTRAVERSED: 204422699 bytes

BNDM_32_hits/BNDM_32_clocks: 514/492
BNDM_32 performance: 405KB/clock
StrnglenTRAVERSED: 204422699 bytes

D:\_KAZE_strstr_SHORT-SHOWDOWN_7Trident2_7Hasherezade_7Gulliver2>
*/
//
/*
; Listing generated by: Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.30319.01 for 80x86
PUBLIC	_Railgun_Doublet
; Function compile flags: /Ogtpy
_TEXT	SEGMENT
_ulHashPattern$ = 8					; size = 4
_pbTarget$ = 8						; size = 4
_pbPattern$ = 12					; size = 4
_pbTargetMax$ = 16					; size = 4
_cbTarget$ = 16						; size = 4
_cbPattern$ = 20					; size = 4
_Railgun_Doublet PROC

; 2640 : 	char * pbTargetMax = pbTarget + cbTarget;

  009d0	8b 44 24 0c	 mov	 eax, DWORD PTR _cbTarget$[esp-4]
  009d4	53		 push	 ebx

; 2641 : 	register uint32_t ulHashPattern;
; 2642 : 	uint32_t ulHashTarget, count, countSTATIC;
; 2643 : 
; 2644 : 	if (cbPattern > cbTarget) return(NULL);

  009d5	8b 5c 24 14	 mov	 ebx, DWORD PTR _cbPattern$[esp]
  009d9	55		 push	 ebp
  009da	8b 6c 24 0c	 mov	 ebp, DWORD PTR _pbTarget$[esp+4]
  009de	8d 0c 28	 lea	 ecx, DWORD PTR [eax+ebp]
  009e1	89 4c 24 14	 mov	 DWORD PTR _pbTargetMax$[esp+4], ecx
  009e5	3b d8		 cmp	 ebx, eax
  009e7	76 05		 jbe	 SHORT $LN8@Railgun_du
  009e9	5d		 pop	 ebp
  009ea	33 c0		 xor	 eax, eax
  009ec	5b		 pop	 ebx

; 2661 : 	}
; 2662 : }

  009ed	c3		 ret	 0
$LN8@Railgun_du:

; 2645 : 
; 2646 : 	countSTATIC = cbPattern-2;
; 2647 : 
; 2648 : 	pbTarget = pbTarget+cbPattern;
; 2649 : 	ulHashPattern = (*(uint16_t *)(pbPattern));

  009ee	8b 54 24 10	 mov	 edx, DWORD PTR _pbPattern$[esp+4]
  009f2	0f b7 02	 movzx	 eax, WORD PTR [edx]
  009f5	57		 push	 edi
  009f6	03 eb		 add	 ebp, ebx
  009f8	8b fd		 mov	 edi, ebp
  009fa	89 44 24 10	 mov	 DWORD PTR _ulHashPattern$[esp+8], eax
  009fe	2b fb		 sub	 edi, ebx
  00a00	56		 push	 esi
$LL7@Railgun_du:

; 2650 : 
; 2651 : 	for ( ;; ) {
; 2652 : 		if ( ulHashPattern == (*(uint16_t *)(pbTarget-cbPattern)) ) {

  00a01	0f b7 0f	 movzx	 ecx, WORD PTR [edi]
  00a04	39 4c 24 14	 cmp	 DWORD PTR _ulHashPattern$[esp+12], ecx
  00a08	75 2e		 jne	 SHORT $LN2@Railgun_du

; 2653 : 			count = countSTATIC;

  00a0a	8d 4b fe	 lea	 ecx, DWORD PTR [ebx-2]

; 2654 : 			while ( count && *(char *)(pbPattern+2+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+2+(countSTATIC-count)) ) {

  00a0d	85 c9		 test	 ecx, ecx
  00a0f	74 1a		 je	 SHORT $LN17@Railgun_du

; 2653 : 			count = countSTATIC;

  00a11	8b 74 24 18	 mov	 esi, DWORD PTR _pbPattern$[esp+12]
  00a15	83 c6 02	 add	 esi, 2
  00a18	8d 57 02	 lea	 edx, DWORD PTR [edi+2]
  00a1b	eb 03 8d 49 00	 npad	 5
$LL4@Railgun_du:

; 2654 : 			while ( count && *(char *)(pbPattern+2+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+2+(countSTATIC-count)) ) {

  00a20	8a 06		 mov	 al, BYTE PTR [esi]
  00a22	3a 02		 cmp	 al, BYTE PTR [edx]
  00a24	75 0e		 jne	 SHORT $LN3@Railgun_du

; 2655 : 				count--;

  00a26	42		 inc	 edx
  00a27	46		 inc	 esi
  00a28	49		 dec	 ecx
  00a29	75 f5		 jne	 SHORT $LL4@Railgun_du
$LN17@Railgun_du:
  00a2b	5e		 pop	 esi
  00a2c	5f		 pop	 edi

; 2656 : 			}
; 2657 : 			if ( count == 0 ) return((pbTarget-cbPattern));

  00a2d	8b c5		 mov	 eax, ebp
  00a2f	5d		 pop	 ebp
  00a30	2b c3		 sub	 eax, ebx
  00a32	5b		 pop	 ebx

; 2661 : 	}
; 2662 : }

  00a33	c3		 ret	 0
$LN3@Railgun_du:

; 2656 : 			}
; 2657 : 			if ( count == 0 ) return((pbTarget-cbPattern));

  00a34	85 c9		 test	 ecx, ecx
  00a36	74 f3		 je	 SHORT $LN17@Railgun_du
$LN2@Railgun_du:

; 2658 : 		}
; 2659 : 		pbTarget++;

  00a38	45		 inc	 ebp
  00a39	47		 inc	 edi

; 2660 : 		if (pbTarget > pbTargetMax) return(NULL);

  00a3a	3b 6c 24 1c	 cmp	 ebp, DWORD PTR _pbTargetMax$[esp+12]
  00a3e	76 c1		 jbe	 SHORT $LL7@Railgun_du
  00a40	5e		 pop	 esi
  00a41	5f		 pop	 edi
  00a42	5d		 pop	 ebp
  00a43	33 c0		 xor	 eax, eax
  00a45	5b		 pop	 ebx

; 2661 : 	}
; 2662 : }

  00a46	c3		 ret	 0
_Railgun_Doublet ENDP
_TEXT	ENDS
*/
//
/*
Testbed: [File: OSHO.TXT 206,908,949 bytes; LinesEncountered: 2,459,508; Windows XP 32bit; Microsoft 2010 32bit compiler; T7500 2200MHz dual DDR2]

Searching for Pattern('an',2bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 1212509/684
strstr_Microsoft performance: 197KB/clock
StrnglenTRAVERSED: 138478024 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 1212509/492
strstr_GNU_C_Library performance: 274KB/clock
StrnglenTRAVERSED: 138478024 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 1212509/493
Railgun_Doublet performance: 274KB/clock
StrnglenTRAVERSED: 138478024 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 1212509/442
Railgun_Quadruplet_8Triplet performance: 305KB/clock
StrnglenTRAVERSED: 138478024 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 1212509/435
Railgun_Mischa_8Triplet performance: 310KB/clock
StrnglenTRAVERSED: 138478024 bytes

BNDM_32_hits/BNDM_32_clocks: 1212509/625
BNDM_32 performance: 216KB/clock
StrnglenTRAVERSED: 138478024 bytes

Searching for Pattern('to',2bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 780175/765
strstr_Microsoft performance: 209KB/clock
StrnglenTRAVERSED: 164505415 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 780175/544
strstr_GNU_C_Library performance: 295KB/clock
StrnglenTRAVERSED: 164505415 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 780175/526
Railgun_Doublet performance: 305KB/clock
StrnglenTRAVERSED: 164505415 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 780175/482
Railgun_Quadruplet_8Triplet performance: 333KB/clock
StrnglenTRAVERSED: 164505415 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 780175/471
Railgun_Mischa_8Triplet performance: 341KB/clock
StrnglenTRAVERSED: 164505415 bytes

BNDM_32_hits/BNDM_32_clocks: 780175/688
BNDM_32 performance: 233KB/clock
StrnglenTRAVERSED: 164505415 bytes

Searching for Pattern('TDK',3bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/744
strstr_Microsoft performance: 268KB/clock
StrnglenTRAVERSED: 204449441 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/490
strstr_GNU_C_Library performance: 407KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 0/575
Railgun_Doublet performance: 347KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 0/453
Railgun_Quadruplet_8Triplet performance: 440KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 0/445
Railgun_Mischa_8Triplet performance: 448KB/clock
StrnglenTRAVERSED: 204449441 bytes

BNDM_32_hits/BNDM_32_clocks: 0/537
BNDM_32 performance: 371KB/clock
StrnglenTRAVERSED: 204449441 bytes

Searching for Pattern('the',3bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 1192002/704
strstr_Microsoft performance: 188KB/clock
StrnglenTRAVERSED: 135882884 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 1192002/519
strstr_GNU_C_Library performance: 255KB/clock
StrnglenTRAVERSED: 135882884 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 1192002/545
Railgun_Doublet performance: 243KB/clock
StrnglenTRAVERSED: 135882884 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 1192002/467
Railgun_Quadruplet_8Triplet performance: 284KB/clock
StrnglenTRAVERSED: 135882884 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 1192002/457
Railgun_Mischa_8Triplet performance: 290KB/clock
StrnglenTRAVERSED: 135882884 bytes

BNDM_32_hits/BNDM_32_clocks: 1192002/645
BNDM_32 performance: 205KB/clock
StrnglenTRAVERSED: 135882884 bytes

Searching for Pattern('fast',4bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 5384/763
strstr_Microsoft performance: 261KB/clock
StrnglenTRAVERSED: 204186782 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 5384/507
strstr_GNU_C_Library performance: 393KB/clock
StrnglenTRAVERSED: 204186782 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 5384/574
Railgun_Doublet performance: 347KB/clock
StrnglenTRAVERSED: 204186782 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 5384/495
Railgun_Quadruplet_8Triplet performance: 402KB/clock
StrnglenTRAVERSED: 204186782 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 5384/580
Railgun_Mischa_8Triplet performance: 343KB/clock
StrnglenTRAVERSED: 204186782 bytes

BNDM_32_hits/BNDM_32_clocks: 5384/635
BNDM_32 performance: 314KB/clock
StrnglenTRAVERSED: 204186782 bytes

Searching for Pattern('easy',4bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 4825/961
strstr_Microsoft performance: 207KB/clock
StrnglenTRAVERSED: 204202166 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 4825/673
strstr_GNU_C_Library performance: 296KB/clock
StrnglenTRAVERSED: 204202166 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 4825/586
Railgun_Doublet performance: 340KB/clock
StrnglenTRAVERSED: 204202166 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 4825/654
Railgun_Quadruplet_8Triplet performance: 304KB/clock
StrnglenTRAVERSED: 204202166 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 4825/580
Railgun_Mischa_8Triplet performance: 343KB/clock
StrnglenTRAVERSED: 204202166 bytes

BNDM_32_hits/BNDM_32_clocks: 4825/655
BNDM_32 performance: 304KB/clock
StrnglenTRAVERSED: 204202166 bytes

Searching for Pattern('grmbl',5bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/769
strstr_Microsoft performance: 259KB/clock
StrnglenTRAVERSED: 204449441 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/512
strstr_GNU_C_Library performance: 389KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 0/571
Railgun_Doublet performance: 349KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 0/497
Railgun_Quadruplet_8Triplet performance: 401KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 0/580
Railgun_Mischa_8Triplet performance: 344KB/clock
StrnglenTRAVERSED: 204449441 bytes

BNDM_32_hits/BNDM_32_clocks: 0/547
BNDM_32 performance: 365KB/clock
StrnglenTRAVERSED: 204449441 bytes

Searching for Pattern('email',5bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 1/953
strstr_Microsoft performance: 209KB/clock
StrnglenTRAVERSED: 204449414 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 1/664
strstr_GNU_C_Library performance: 300KB/clock
StrnglenTRAVERSED: 204449414 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 1/574
Railgun_Doublet performance: 347KB/clock
StrnglenTRAVERSED: 204449414 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 1/653
Railgun_Quadruplet_8Triplet performance: 305KB/clock
StrnglenTRAVERSED: 204449414 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 1/580
Railgun_Mischa_8Triplet performance: 344KB/clock
StrnglenTRAVERSED: 204449414 bytes

BNDM_32_hits/BNDM_32_clocks: 1/626
BNDM_32 performance: 318KB/clock
StrnglenTRAVERSED: 204449414 bytes

Searching for Pattern('pasting',7bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 2/763
strstr_Microsoft performance: 261KB/clock
StrnglenTRAVERSED: 204449363 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 2/506
strstr_GNU_C_Library performance: 394KB/clock
StrnglenTRAVERSED: 204449363 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 2/565
Railgun_Doublet performance: 353KB/clock
StrnglenTRAVERSED: 204449363 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 2/488
Railgun_Quadruplet_8Triplet performance: 409KB/clock
StrnglenTRAVERSED: 204449363 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 2/581
Railgun_Mischa_8Triplet performance: 343KB/clock
StrnglenTRAVERSED: 204449363 bytes

BNDM_32_hits/BNDM_32_clocks: 2/617
BNDM_32 performance: 323KB/clock
StrnglenTRAVERSED: 204449363 bytes

Searching for Pattern('amazing',7bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 319/871
strstr_Microsoft performance: 229KB/clock
StrnglenTRAVERSED: 204432134 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 319/592
strstr_GNU_C_Library performance: 337KB/clock
StrnglenTRAVERSED: 204432134 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 319/565
Railgun_Doublet performance: 353KB/clock
StrnglenTRAVERSED: 204432134 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 319/569
Railgun_Quadruplet_8Triplet performance: 350KB/clock
StrnglenTRAVERSED: 204432134 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 319/580
Railgun_Mischa_8Triplet performance: 344KB/clock
StrnglenTRAVERSED: 204432134 bytes

BNDM_32_hits/BNDM_32_clocks: 319/569
BNDM_32 performance: 350KB/clock
StrnglenTRAVERSED: 204432134 bytes

Searching for Pattern('underdog',8bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 4/799
strstr_Microsoft performance: 249KB/clock
StrnglenTRAVERSED: 204449185 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 4/533
strstr_GNU_C_Library performance: 374KB/clock
StrnglenTRAVERSED: 204449185 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 4/568
Railgun_Doublet performance: 351KB/clock
StrnglenTRAVERSED: 204449185 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 4/510
Railgun_Quadruplet_8Triplet performance: 391KB/clock
StrnglenTRAVERSED: 204449185 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 4/581
Railgun_Mischa_8Triplet performance: 343KB/clock
StrnglenTRAVERSED: 204449185 bytes

BNDM_32_hits/BNDM_32_clocks: 4/587
BNDM_32 performance: 340KB/clock
StrnglenTRAVERSED: 204449185 bytes

Searching for Pattern('superdog',8bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/848
strstr_Microsoft performance: 235KB/clock
StrnglenTRAVERSED: 204449441 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/578
strstr_GNU_C_Library performance: 345KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 0/562
Railgun_Doublet performance: 355KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 0/556
Railgun_Quadruplet_8Triplet performance: 359KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 0/580
Railgun_Mischa_8Triplet performance: 344KB/clock
StrnglenTRAVERSED: 204449441 bytes

BNDM_32_hits/BNDM_32_clocks: 0/585
BNDM_32 performance: 341KB/clock
StrnglenTRAVERSED: 204449441 bytes

Searching for Pattern('participants',12bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 141/763
strstr_Microsoft performance: 261KB/clock
StrnglenTRAVERSED: 204441500 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 141/507
strstr_GNU_C_Library performance: 393KB/clock
StrnglenTRAVERSED: 204441500 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 141/549
Railgun_Doublet performance: 363KB/clock
StrnglenTRAVERSED: 204441500 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 141/478
Railgun_Quadruplet_8Triplet performance: 417KB/clock
StrnglenTRAVERSED: 204441500 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 141/580
Railgun_Mischa_8Triplet performance: 344KB/clock
StrnglenTRAVERSED: 204441500 bytes

BNDM_32_hits/BNDM_32_clocks: 141/560
BNDM_32 performance: 356KB/clock
StrnglenTRAVERSED: 204441500 bytes

Searching for Pattern('skillessness',12bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/846
strstr_Microsoft performance: 236KB/clock
StrnglenTRAVERSED: 204449441 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/576
strstr_GNU_C_Library performance: 346KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 0/547
Railgun_Doublet performance: 365KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 0/543
Railgun_Quadruplet_8Triplet performance: 367KB/clock
StrnglenTRAVERSED: 204449441 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 0/580
Railgun_Mischa_8Triplet performance: 344KB/clock
StrnglenTRAVERSED: 204449441 bytes

BNDM_32_hits/BNDM_32_clocks: 0/546
BNDM_32 performance: 365KB/clock
StrnglenTRAVERSED: 204449441 bytes

Searching for Pattern('I should have known',19bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 1/751
strstr_Microsoft performance: 265KB/clock
StrnglenTRAVERSED: 204449346 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 1/495
strstr_GNU_C_Library performance: 403KB/clock
StrnglenTRAVERSED: 204449346 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 1/528
Railgun_Doublet performance: 378KB/clock
StrnglenTRAVERSED: 204449346 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 1/450
Railgun_Quadruplet_8Triplet performance: 443KB/clock
StrnglenTRAVERSED: 204449346 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 1/580
Railgun_Mischa_8Triplet performance: 344KB/clock
StrnglenTRAVERSED: 204449346 bytes

BNDM_32_hits/BNDM_32_clocks: 1/553
BNDM_32 performance: 361KB/clock
StrnglenTRAVERSED: 204449346 bytes

Searching for Pattern('human consciousness',19bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft_hits/strstr_Microsoft_clocks: 514/834
strstr_Microsoft performance: 239KB/clock
StrnglenTRAVERSED: 204422699 bytes

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 514/563
strstr_GNU_C_Library performance: 354KB/clock
StrnglenTRAVERSED: 204422699 bytes

Railgun_Doublet_hits/Railgun_Doublet_clocks: 514/525
Railgun_Doublet performance: 380KB/clock
StrnglenTRAVERSED: 204422699 bytes

Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 514/509
Railgun_Quadruplet_8Triplet performance: 392KB/clock
StrnglenTRAVERSED: 204422699 bytes

Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: 514/580
Railgun_Mischa_8Triplet performance: 344KB/clock
StrnglenTRAVERSED: 204422699 bytes

BNDM_32_hits/BNDM_32_clocks: 514/546
BNDM_32 performance: 365KB/clock
StrnglenTRAVERSED: 204422699 bytes

D:\_KAZE_strstr_SHORT-SHOWDOWN_7Trident2_7Hasherezade_7Gulliver2>
*/
//
// Notes on 80x86 and x64, 2012-Feb-25:
// Three compilers were used (first two on Windows 7 64bit, the third on Windows XP 32bit):
// Intel(R) C++ 64 Compiler XE for applications running on Intel(R) 64, Version 12.1.1.258 Build 20111011
// Microsoft (R) C/C++ Optimizing Compiler Version 16.00.30319.01 for x64
// Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.30319.01 for 80x86
// I have been using x64 for more than 12 hours and quickly the picture has become clear: code written for 32bit must be replaced with dedicated 64bit counterpart, relying on former only is a gambling venture.
// For example Railgun_Doublet dominates when compiled as 64bit (both with Intel XE 2011 12.1 and Microsoft 2010 16.00.30319.01 for x64), but for 32bit it is inferior compared to Railgun_Quadruplet_8Triplet.
// Summary for strstr-like (i.e. memmem for short strings/patterns) usage:
// - for 32bit use Railgun_Quadruplet_8Triplet
// - for 64bit use Railgun_Doublet
// My interest has been shifted from strstr toward memmem, Railgun_Doublet will fill the gap (short strings/patterns cases) in Railgun_r8_Mimino_x64 whereas 'Trident2'+'Hasherezade' will deal with memmem part, 'Hasherezade' should be surely tuned for 64bit.
/*
A self-explanatory smackdown:

File: strstr_SHORT-SHOWDOWN_Microsoft_v16_Ox_64bit.exe
Testbed: Windows 7 64bit; Microsoft 2010 64bit compiler; E7500 2.93GHz dual DDR2
Target(as-one-line): OSHO.TXT 206908949 bytes

Pattern: fastest fox
Railgun_Quadruplet_7sun:         2000KB/clock / 0775%, 26672940 iterations
Railgun_Quadruplet_7Trident:     2149KB/clock / 0993%, 20834332 iterations BNDM screams here along with Horspool&Sunday
BNDM_32:                         1507KB/clock / 1038%, 19926393 iterations
Railgun_Quadruplet_7Gulliver:    2104KB/clock / 0977%, 21166516 iterations
Railgun_Quadruplet_7Hasherezade: 1961KB/clock / 0980%, 21112673 iterations

Pattern: fastest fox with biggest strides
Railgun_Quadruplet_7sun:         3061KB/clock / 1584%, 13060463 iterations
Railgun_Quadruplet_7Trident:     3424KB/clock / 3058%, 06765272 iterations
BNDM_32:                         3483KB/clock / 3113%, 06644708 iterations
Railgun_Quadruplet_7Gulliver:    3608KB/clock / 2924%, 07074287 iterations
Railgun_Quadruplet_7Hasherezade: 3367KB/clock / 3041%, 06801754 iterations Needs 64bit hand optimization!

File: strstr_SHORT-SHOWDOWN_Intel_O3_64bit.exe
Testbed: Windows 7 64bit; Intel C++ 64 Compiler XE 12.1; E7500 2.93GHz dual DDR2
Target(as-one-line): OSHO.TXT 206908949 bytes

Pattern: fastest fox
Railgun_Quadruplet_7sun:         1942KB/clock / 0775%, 26672940 iterations
Railgun_Quadruplet_7Trident:     2245KB/clock / 0993%, 20834332 iterations BNDM screams here along with Horspool&Sunday
BNDM_32:                         1507KB/clock / 1038%, 19926393 iterations
Railgun_Quadruplet_7Gulliver:    1870KB/clock / 0977%, 21166516 iterations
Railgun_Quadruplet_7Hasherezade: 1697KB/clock / 0980%, 21112673 iterations

Pattern: fastest fox with biggest strides
Railgun_Quadruplet_7sun:         3061KB/clock / 1584%, 13060463 iterations
Railgun_Quadruplet_7Trident:     3424KB/clock / 3058%, 06765272 iterations
BNDM_32:                         3544KB/clock / 3113%, 06644708 iterations
Railgun_Quadruplet_7Gulliver:    3608KB/clock / 2924%, 07074287 iterations
Railgun_Quadruplet_7Hasherezade: 3207KB/clock / 3041%, 06801754 iterations Needs 64bit hand optimization!

File: strstr_SHORT-SHOWDOWN_Microsoft_Ox_32bit.exe
Testbed: Windows XP 32bit; Microsoft 2010 32bit compiler; T7500 2200MHz dual DDR2
Target(as-one-line): OSHO.TXT 206908949 bytes

Pattern: fastest fox
Railgun_Quadruplet_7sun:         1683KB/clock / 0775%, 26672940 iterations
Railgun_Quadruplet_7Trident:     1888KB/clock / 0993%, 20834332 iterations
BNDM_32:                         1135KB/clock / 1038%, 19926393 iterations
Railgun_Quadruplet_7Gulliver:    1942KB/clock / 0977%, 21166516 iterations
Railgun_Quadruplet_7Hasherezade: 1757KB/clock / 0980%, 21112673 iterations
 
Pattern: fastest fox with biggest strides
Railgun_Quadruplet_7sun:         2658KB/clock / 1584%, 13060463 iterations
Railgun_Quadruplet_7Trident:     3015KB/clock / 3058%, 06765272 iterations
BNDM_32:                         2806KB/clock / 3113%, 06644708 iterations
Railgun_Quadruplet_7Gulliver:    3157KB/clock / 2924%, 07074287 iterations
Railgun_Quadruplet_7Hasherezade: 3061KB/clock / 3041%, 06801754 iterations

Bottom-line: Railgun_Quadruplet_7Trident is my choice for patterns up to 16chars (and for 32+KB haystacks, for 32--KB haystacks Railgun_Doublet takes over), it is to be used as backbone (along with Railgun_Quadruplet_7Hasherezade for patterns bigger than 16chars) of Railgun_r8_Mimino_x64.
*/
//
/*
Testbed: [File: OSHO.TXT 206,908,949 bytes; LinesEncountered: 2,459,508; Windows 7 64bit; E7500 2.93GHz dual DDR2]

Microsoft 2010 64bit compiler / Intel C++ 64 Compiler XE 12.1

Searching for Pattern('an',2bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft performance: 248KB/clock 238KB/clock
strstr_GNU_C_Library performance: 376KB/clock 296KB/clock
Railgun_Doublet performance: 421KB/clock 348KB/clock
BNDM_32 performance: 267KB/clock 241KB/clock

Searching for Pattern('to',2bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft performance: 260KB/clock 255KB/clock
strstr_GNU_C_Library performance: 400KB/clock 320KB/clock
Railgun_Doublet performance: 476KB/clock 397KB/clock
BNDM_32 performance: 290KB/clock 262KB/clock

Searching for Pattern('TDK',3bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft performance: 318KB/clock 323KB/clock
strstr_GNU_C_Library performance: 564KB/clock 421KB/clock
Railgun_Doublet performance: 551KB/clock 464KB/clock
BNDM_32 performance: 462KB/clock 409KB/clock

Searching for Pattern('the',3bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft performance: 233KB/clock 227KB/clock
strstr_GNU_C_Library performance: 346KB/clock 276KB/clock
Railgun_Doublet performance: 393KB/clock 328KB/clock
BNDM_32 performance: 255KB/clock 234KB/clock

Searching for Pattern('fast',4bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft performance: 311KB/clock 315KB/clock
strstr_GNU_C_Library performance: 540KB/clock 417KB/clock
Railgun_Doublet performance: 550KB/clock 463KB/clock
BNDM_32 performance: 390KB/clock 352KB/clock

Searching for Pattern('easy',4bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft performance: 258KB/clock 257KB/clock
strstr_GNU_C_Library performance: 395KB/clock 323KB/clock
Railgun_Doublet performance: 534KB/clock 452KB/clock
BNDM_32 performance: 379KB/clock 343KB/clock

Searching for Pattern('grmbl',5bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft performance: 309KB/clock 314KB/clock
strstr_GNU_C_Library performance: 535KB/clock 415KB/clock
Railgun_Doublet performance: 553KB/clock 465KB/clock
BNDM_32 performance: 451KB/clock 402KB/clock

Searching for Pattern('email',5bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft performance: 261KB/clock 260KB/clock
strstr_GNU_C_Library performance: 401KB/clock 326KB/clock
Railgun_Doublet performance: 550KB/clock 463KB/clock
BNDM_32 performance: 395KB/clock 359KB/clock

Searching for Pattern('pasting',7bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft performance: 311KB/clock 314KB/clock
strstr_GNU_C_Library performance: 542KB/clock 415KB/clock
Railgun_Doublet performance: 559KB/clock 469KB/clock
BNDM_32 performance: 400KB/clock 367KB/clock

Searching for Pattern('amazing',7bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft performance: 281KB/clock 282KB/clock
strstr_GNU_C_Library performance: 455KB/clock 367KB/clock
Railgun_Doublet performance: 557KB/clock 468KB/clock
BNDM_32 performance: 433KB/clock 392KB/clock

Searching for Pattern('underdog',8bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft performance: 300KB/clock 304KB/clock
strstr_GNU_C_Library performance: 510KB/clock 405KB/clock
Railgun_Doublet performance: 553KB/clock 464KB/clock
BNDM_32 performance: 419KB/clock 383KB/clock

Searching for Pattern('superdog',8bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft performance: 287KB/clock 288KB/clock
strstr_GNU_C_Library performance: 468KB/clock 378KB/clock
Railgun_Doublet performance: 562KB/clock 470KB/clock
BNDM_32 performance: 420KB/clock 384KB/clock

Searching for Pattern('participants',12bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft performance: 311KB/clock 315KB/clock
strstr_GNU_C_Library performance: 541KB/clock 415KB/clock
Railgun_Doublet performance: 568KB/clock 477KB/clock
BNDM_32 performance: 436KB/clock 398KB/clock

Searching for Pattern('skillessness',12bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft performance: 287KB/clock 289KB/clock
strstr_GNU_C_Library performance: 469KB/clock 378KB/clock
Railgun_Doublet performance: 573KB/clock 479KB/clock
BNDM_32 performance: 447KB/clock 407KB/clock

Searching for Pattern('I should have known',19bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft performance: 316KB/clock 320KB/clock
strstr_GNU_C_Library performance: 557KB/clock 418KB/clock
Railgun_Doublet performance: 587KB/clock 490KB/clock
BNDM_32 performance: 435KB/clock 400KB/clock

Searching for Pattern('human consciousness',19bytes) into String(206908949bytes) line-by-line ...

strstr_Microsoft performance: 291KB/clock 294KB/clock
strstr_GNU_C_Library performance: 482KB/clock 388KB/clock
Railgun_Doublet performance: 594KB/clock 494KB/clock
BNDM_32 performance: 441KB/clock 405KB/clock

Overview:

GNU Berg's performance (compiled with Microsoft 2010 64bit compiler): 376KB/clock+400KB/clock+564KB/clock+346KB/clock+540KB/clock+395KB/clock+535KB/clock+401KB/clock+542KB/clock+455KB/clock+510KB/clock+468KB/clock+541KB/clock+469KB/clock+557KB/clock+482KB/clock=7581
GNU Berg's performance (compiled with Intel C++ 64 Compiler XE 12.1): 296KB/clock+320KB/clock+421KB/clock+276KB/clock+417KB/clock+323KB/clock+415KB/clock+326KB/clock+415KB/clock+367KB/clock+405KB/clock+378KB/clock+415KB/clock+378KB/clock+418KB/clock+388KB/clock=5958

Railgun_Doublet performance (compiled with Microsoft 2010 64bit compiler): 421KB/clock+476KB/clock+551KB/clock+393KB/clock+550KB/clock+534KB/clock+553KB/clock+550KB/clock+559KB/clock+557KB/clock+553KB/clock+562KB/clock+568KB/clock+573KB/clock+587KB/clock+594KB/clock=8581
Railgun_Doublet performance (compiled with Intel C++ 64 Compiler XE 12.1): 348KB/clock+397KB/clock+464KB/clock+328KB/clock+463KB/clock+452KB/clock+465KB/clock+463KB/clock+469KB/clock+468KB/clock+464KB/clock+470KB/clock+477KB/clock+479KB/clock+490KB/clock+494KB/clock=7191

BNDM_32 performance (compiled with Microsoft 2010 64bit compiler): 267KB/clock+290KB/clock+462KB/clock+255KB/clock+390KB/clock+379KB/clock+451KB/clock+395KB/clock+400KB/clock+433KB/clock+419KB/clock+420KB/clock+436KB/clock+447KB/clock+435KB/clock+441KB/clock=6320
BNDM_32 performance (compiled with Intel C++ 64 Compiler XE 12.1): 241KB/clock+262KB/clock+409KB/clock+234KB/clock+352KB/clock+343KB/clock+402KB/clock+359KB/clock+367KB/clock+392KB/clock+383KB/clock+384KB/clock+398KB/clock+407KB/clock+400KB/clock+405KB/clock=5738

Summary for all 16 patterns:

GNU Berg's performance:      7581 Microsoft / 5958 Intel
Railgun_Doublet performance: 8581 Microsoft / 7191 Intel
BNDM_32 performance:         6320 Microsoft / 5738 Intel

Using Microsoft:
Railgun_Doublet is (8581-7581)/7581*100% = 13% faster than GNU Berg's
Railgun_Doublet is (8581-6320)/6320*100% = 35% faster than BNDM_32

Using Intel:
Railgun_Doublet is (7191-5958)/5958*100% = 20% faster than GNU Berg's
Railgun_Doublet is (7191-5738)/5738*100% = 25% faster than BNDM_32
*/






// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm [
// Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char.
char * Railgun_Quadruplet_8Triplet (char * pbTarget,
     char * pbPattern,
     unsigned long cbTarget,
     unsigned long cbPattern)
{
    char * pbTargetMax = pbTarget + cbTarget;
    register uint32_t  ulHashPattern;
    uint32_t ulHashTarget;
    uint32_t count;
    uint32_t countSTATIC;
//  unsigned long countRemainder;

/*
    const unsigned char SINGLET = *(char *)(pbPattern);
    const unsigned long Quadruplet2nd = SINGLET<<8;
    const unsigned long Quadruplet3rd = SINGLET<<16;
    const unsigned long Quadruplet4th = SINGLET<<24;
*/
    unsigned char SINGLET;
    uint32_t Quadruplet2nd;
    uint32_t Quadruplet3rd;
    uint32_t Quadruplet4th;

    uint32_t  AdvanceHopperGrass;

    long i; //BMH needed
//    int a, j, bm_bc[ASIZE]; //BMH needed
//    unsigned char ch; //BMH needed
//    unsigned char lastch, firstch; //BMH needed

    if (cbPattern > cbTarget)
        return(NULL);

// Doesn't work when cbPattern = 1
// The next IF-fragment works very well with cbPattern>1, OBVIOUSLY IT MUST BE UNROLLED(but crippled with less functionality) SINCE either cbPattern=2 or cbPattern=3!
if ( cbPattern<4) { // This IF makes me unhappy: it slows down from 390KB/clock to 367KB/clock for 'fast' pattern. This fragment(for 2..3 pattern lengths) is needed because I need a function different than strchr but sticking to strstr i.e. lengths above 1 are to be handled.
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));
//        countSTATIC = cbPattern-2;


/* 
8short:

Searching for Pattern('TDK',3bytes) into String(206908949bytes) line-by-line ...

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/496
strstr_GNU_C_Library performance: 402KB/clock
Railgun_Quadruplet_8Short_hits/Railgun_Quadruplet_8Short_clocks: 0/504
Railgun_Quadruplet_8Short performance: 396KB/clock

Searching for Pattern('the',3bytes) into String(206908949bytes) line-by-line ...

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 1192002/521
strstr_GNU_C_Library performance: 254KB/clock
Railgun_Quadruplet_8Short_hits/Railgun_Quadruplet_8Short_clocks: 1192002/489
Railgun_Quadruplet_8Short performance: 271KB/clock

8Triplet:

Searching for Pattern('TDK',3bytes) into String(206908949bytes) line-by-line ...

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/504
strstr_GNU_C_Library performance: 396KB/clock
Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 0/471
Railgun_Quadruplet_8Triplet performance: 423KB/clock

Searching for Pattern('the',3bytes) into String(206908949bytes) line-by-line ...

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 1192002/528
strstr_GNU_C_Library performance: 251KB/clock
Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 1192002/479
Railgun_Quadruplet_8Triplet performance: 277KB/clock

*/
if ( cbPattern==3) {
    for ( ;; )
    {
        if ( ulHashPattern == ( (*(char *)(pbTarget-3))<<8 ) + *(pbTarget-1) ) {
         if ( *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) return((pbTarget-3));
        }
        if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) { 
		pbTarget++;
	        //if ( (char)(ulHashPattern>>8) != *(pbTarget-1) ) pbTarget++;
	        if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) pbTarget++; // r8Triplet
	}
        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else {
}
    for ( ;; )
    {
        // The line below gives for 'cbPattern'>=1:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/543
        // Karp_Rabin_Kaze_4_OCTETS performance: 372KB/clock
/*
        if ( (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
            return((long)(pbTarget-cbPattern));
*/

        // The fragment below gives for 'cbPattern'>=2:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/546
        // Karp_Rabin_Kaze_4_OCTETS performance: 370KB/clock

/*
//For 2 and 3 [
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
//         count = countSTATIC;
         count = cbPattern-2;
//         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
         while ( count && *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) { // Crippling i.e. only 2 and 3 chars are allowed!
               count--;
         }
         if ( count == 0) return((pbTarget-cbPattern));
        }
        if ( (char)(ulHashPattern>>8) != *(pbTarget-cbPattern+1) ) pbTarget++;
//For 2 and 3 ]
*/


        if ( ulHashPattern == ( (*(char *)(pbTarget-2))<<8 ) + *(pbTarget-1) )
            return((pbTarget-2));
        if ( (char)(ulHashPattern>>8) != *(pbTarget-1) ) pbTarget++;


        // The fragment below gives for 'cbPattern'>=2:
	// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/554
	// Karp_Rabin_Kaze_4_OCTETS performance: 364KB/clock
/*
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
         count = countSTATIC>>2;
         countRemainder = countSTATIC % 4;

         while ( count && *(unsigned long *)(pbPattern+1+((count-1)<<2)) == *(unsigned long *)(pbTarget-cbPattern+1+((count-1)<<2)) ) {
               count--;
         }
	 //if (count == 0) {  // Disastrous degradation only from this line(317KB/clock when 1+2x4+2+1 bytes pattern: 'skillessness'; 312KB/clock when 1+1x4+2+1 bytes pattern: 'underdog'), otherwise 368KB/clock.
         while ( countRemainder && *(char *)(pbPattern+1+(countSTATIC-countRemainder)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-countRemainder)) ) {
               countRemainder--;
         }
         //if ( countRemainder == 0) return((long)(pbTarget-cbPattern));
         if ( count+countRemainder == 0) return((long)(pbTarget-cbPattern));
         //}
        }
*/

        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else { //if ( cbPattern<4)
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = *(unsigned long *)(pbPattern);
//        countSTATIC = cbPattern-1;

    //SINGLET = *(char *)(pbPattern);
    SINGLET = ulHashPattern & 0xFF;
    Quadruplet2nd = SINGLET<<8;
    Quadruplet3rd = SINGLET<<16;
    Quadruplet4th = SINGLET<<24;

    for ( ;; )
    {
	AdvanceHopperGrass = 0;
	ulHashTarget = *(unsigned long *)(pbTarget-cbPattern);

        if ( ulHashPattern == ulHashTarget ) { // Three unnecessary comparisons here, but 'AdvanceHopperGrass' must be calculated - it has a higher priority.
//         count = countSTATIC;
//         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
//	       if ( countSTATIC==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) AdvanceHopperGrass++;
//               count--;
//         }
         count = cbPattern-1;
         while ( count && *(char *)(pbPattern+(cbPattern-count)) == *(char *)(pbTarget-count) ) {
	       if ( cbPattern-1==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-count) ) AdvanceHopperGrass++;
               count--;
         }
         if ( count == 0) return((pbTarget-cbPattern));
        } else { // The goal here: to avoid memory accesses by stressing the registers.
    if ( Quadruplet2nd != (ulHashTarget & 0x0000FF00) ) {
         AdvanceHopperGrass++;
         if ( Quadruplet3rd != (ulHashTarget & 0x00FF0000) ) {
              AdvanceHopperGrass++;
              if ( Quadruplet4th != (ulHashTarget & 0xFF000000) ) AdvanceHopperGrass++;
         }
    }
	}

	AdvanceHopperGrass++;

	pbTarget = pbTarget + AdvanceHopperGrass;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} //if ( cbPattern<4)
}
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm ]

// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm [
// Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char.
char * Railgun_Mischa_8Triplet (char * pbTarget,
     char * pbPattern,
     unsigned long cbTarget,
     unsigned long cbPattern)
{
    char * pbTargetMax = pbTarget + cbTarget;
    register uint32_t  ulHashPattern;
    uint32_t ulHashTarget;
    uint32_t count;
    uint32_t countSTATIC;
//  unsigned long countRemainder;

/*
    const unsigned char SINGLET = *(char *)(pbPattern);
    const unsigned long Quadruplet2nd = SINGLET<<8;
    const unsigned long Quadruplet3rd = SINGLET<<16;
    const unsigned long Quadruplet4th = SINGLET<<24;
*/
    unsigned char SINGLET;
    uint32_t Quadruplet2nd;
    uint32_t Quadruplet3rd;
    uint32_t Quadruplet4th;

    uint32_t  AdvanceHopperGrass;

    long i; //BMH needed
//    int a, j, bm_bc[ASIZE]; //BMH needed
//    unsigned char ch; //BMH needed
//    unsigned char lastch, firstch; //BMH needed

    if (cbPattern > cbTarget)
        return(NULL);

// Doesn't work when cbPattern = 1
// The next IF-fragment works very well with cbPattern>1, OBVIOUSLY IT MUST BE UNROLLED(but crippled with less functionality) SINCE either cbPattern=2 or cbPattern=3!
if ( cbPattern<4) { // This IF makes me unhappy: it slows down from 390KB/clock to 367KB/clock for 'fast' pattern. This fragment(for 2..3 pattern lengths) is needed because I need a function different than strchr but sticking to strstr i.e. lengths above 1 are to be handled.
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));
//        countSTATIC = cbPattern-2;


/* 
8short:

Searching for Pattern('TDK',3bytes) into String(206908949bytes) line-by-line ...

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/496
strstr_GNU_C_Library performance: 402KB/clock
Railgun_Quadruplet_8Short_hits/Railgun_Quadruplet_8Short_clocks: 0/504
Railgun_Quadruplet_8Short performance: 396KB/clock

Searching for Pattern('the',3bytes) into String(206908949bytes) line-by-line ...

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 1192002/521
strstr_GNU_C_Library performance: 254KB/clock
Railgun_Quadruplet_8Short_hits/Railgun_Quadruplet_8Short_clocks: 1192002/489
Railgun_Quadruplet_8Short performance: 271KB/clock

8Triplet:

Searching for Pattern('TDK',3bytes) into String(206908949bytes) line-by-line ...

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/504
strstr_GNU_C_Library performance: 396KB/clock
Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 0/471
Railgun_Quadruplet_8Triplet performance: 423KB/clock

Searching for Pattern('the',3bytes) into String(206908949bytes) line-by-line ...

strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 1192002/528
strstr_GNU_C_Library performance: 251KB/clock
Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: 1192002/479
Railgun_Quadruplet_8Triplet performance: 277KB/clock

*/
if ( cbPattern==3) {
    for ( ;; )
    {
        if ( ulHashPattern == ( (*(char *)(pbTarget-3))<<8 ) + *(pbTarget-1) ) {
         if ( *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) return((pbTarget-3));
        }
        if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) { 
		pbTarget++;
	        //if ( (char)(ulHashPattern>>8) != *(pbTarget-1) ) pbTarget++;
	        if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) pbTarget++; // r8Triplet
	}
        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else {
}
    for ( ;; )
    {
        // The line below gives for 'cbPattern'>=1:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/543
        // Karp_Rabin_Kaze_4_OCTETS performance: 372KB/clock
/*
        if ( (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
            return((long)(pbTarget-cbPattern));
*/

        // The fragment below gives for 'cbPattern'>=2:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/546
        // Karp_Rabin_Kaze_4_OCTETS performance: 370KB/clock

/*
//For 2 and 3 [
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
//         count = countSTATIC;
         count = cbPattern-2;
//         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
         while ( count && *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) { // Crippling i.e. only 2 and 3 chars are allowed!
               count--;
         }
         if ( count == 0) return((pbTarget-cbPattern));
        }
        if ( (char)(ulHashPattern>>8) != *(pbTarget-cbPattern+1) ) pbTarget++;
//For 2 and 3 ]
*/


        if ( ulHashPattern == ( (*(char *)(pbTarget-2))<<8 ) + *(pbTarget-1) )
            return((pbTarget-2));
        if ( (char)(ulHashPattern>>8) != *(pbTarget-1) ) pbTarget++;


        // The fragment below gives for 'cbPattern'>=2:
	// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/554
	// Karp_Rabin_Kaze_4_OCTETS performance: 364KB/clock
/*
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
         count = countSTATIC>>2;
         countRemainder = countSTATIC % 4;

         while ( count && *(unsigned long *)(pbPattern+1+((count-1)<<2)) == *(unsigned long *)(pbTarget-cbPattern+1+((count-1)<<2)) ) {
               count--;
         }
	 //if (count == 0) {  // Disastrous degradation only from this line(317KB/clock when 1+2x4+2+1 bytes pattern: 'skillessness'; 312KB/clock when 1+1x4+2+1 bytes pattern: 'underdog'), otherwise 368KB/clock.
         while ( countRemainder && *(char *)(pbPattern+1+(countSTATIC-countRemainder)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-countRemainder)) ) {
               countRemainder--;
         }
         //if ( countRemainder == 0) return((long)(pbTarget-cbPattern));
         if ( count+countRemainder == 0) return((long)(pbTarget-cbPattern));
         //}
        }
*/

        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else { //if ( cbPattern<4)
			// A better strstr, with no asm code
			// Written by Mischa Sandberg
			// http://mischasan.wordpress.com
			// static char const *
			// scanstrm(char const *tgt, char const *pat, int len)
			// {
			//     uint32_t head = MSBF32(pat), wind = 0, next;
			// 
			//     pat += 4, len -= 4;
			//     while ((next = *(uint8_t const*)tgt++)) {
			//         wind = ( wind << 8 ) + next;
			//         if (wind == head && !memcmp(tgt, pat, len))
			//             return tgt - 4;
			//     }
			//     return  NULL;
			//}
			ulHashPattern = 0;
			ulHashPattern = ( ulHashPattern << 8 ) + *(uint8_t const*)pbPattern++;
			ulHashPattern = ( ulHashPattern << 8 ) + *(uint8_t const*)pbPattern++;
			ulHashPattern = ( ulHashPattern << 8 ) + *(uint8_t const*)pbPattern++;
			ulHashPattern = ( ulHashPattern << 8 ) + *(uint8_t const*)pbPattern++;
			AdvanceHopperGrass = 0;
			cbPattern -= 4;
			while ((ulHashTarget = *(uint8_t const*)pbTarget++)) {
				AdvanceHopperGrass = ( AdvanceHopperGrass << 8 ) + ulHashTarget;
				if (AdvanceHopperGrass == ulHashPattern && !memcmp(pbTarget, pbPattern, cbPattern))
				return pbTarget - 4;
			}
			return  NULL;
} //if ( cbPattern<4)
}
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm ]


// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm [
// Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char.
char * Railgun_Quadruplet_6ppMischa (char * pbTarget,
     char * pbPattern,
     unsigned long cbTarget,
     unsigned long cbPattern)
{
    char * pbTargetMax = pbTarget + cbTarget;
    register unsigned long  ulHashPattern;
    unsigned long ulHashTarget;
    //unsigned long count; //r.6+
    signed long count;
    //unsigned long countSTATIC; //r.6+
    signed long countSTATIC;
//  unsigned long countRemainder;

/*
    const unsigned char SINGLET = *(char *)(pbPattern);
    const unsigned long Quadruplet2nd = SINGLET<<8;
    const unsigned long Quadruplet3rd = SINGLET<<16;
    const unsigned long Quadruplet4th = SINGLET<<24;
*/
    unsigned char SINGLET;
    unsigned long Quadruplet2nd;
    unsigned long Quadruplet3rd;
    unsigned long Quadruplet4th;

    unsigned long  AdvanceHopperGrass;

    long i; //BMH needed
    int a, j, bm_bc[ASIZE]; //BMH needed
    unsigned char ch; //BMH needed
//    unsigned char lastch, firstch; //BMH needed

    if (cbPattern > cbTarget)
        return(NULL);

// Doesn't work when cbPattern = 1
// The next IF-fragment works very well with cbPattern>1, OBVIOUSLY IT MUST BE UNROLLED(but crippled with less functionality) SINCE either cbPattern=2 or cbPattern=3!
if ( cbPattern<4) { // This IF makes me unhappy: it slows down from 390KB/clock to 367KB/clock for 'fast' pattern. This fragment(for 2..3 pattern lengths) is needed because I need a function different than strchr but sticking to strstr i.e. lengths above 1 are to be handled.
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));
//        countSTATIC = cbPattern-2;

if ( cbPattern==3) {
    for ( ;; )
    {
        if ( ulHashPattern == ( (*(char *)(pbTarget-3))<<8 ) + *(pbTarget-1) ) {
         if ( *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) return((pbTarget-3));
        }
        if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) pbTarget++;
        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else {
}
    for ( ;; )
    {
        // The line below gives for 'cbPattern'>=1:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/543
        // Karp_Rabin_Kaze_4_OCTETS performance: 372KB/clock
/*
        if ( (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
            return((long)(pbTarget-cbPattern));
*/

        // The fragment below gives for 'cbPattern'>=2:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/546
        // Karp_Rabin_Kaze_4_OCTETS performance: 370KB/clock

/*
//For 2 and 3 [
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
//         count = countSTATIC;
         count = cbPattern-2;
//         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
         while ( count && *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) { // Crippling i.e. only 2 and 3 chars are allowed!
               count--;
         }
         if ( count == 0) return((pbTarget-cbPattern));
        }
        if ( (char)(ulHashPattern>>8) != *(pbTarget-cbPattern+1) ) pbTarget++;
//For 2 and 3 ]
*/


        if ( ulHashPattern == ( (*(char *)(pbTarget-2))<<8 ) + *(pbTarget-1) )
            return((pbTarget-2));
        if ( (char)(ulHashPattern>>8) != *(pbTarget-1) ) pbTarget++;


        // The fragment below gives for 'cbPattern'>=2:
	// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/554
	// Karp_Rabin_Kaze_4_OCTETS performance: 364KB/clock
/*
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
         count = countSTATIC>>2;
         countRemainder = countSTATIC % 4;

         while ( count && *(unsigned long *)(pbPattern+1+((count-1)<<2)) == *(unsigned long *)(pbTarget-cbPattern+1+((count-1)<<2)) ) {
               count--;
         }
	 //if (count == 0) {  // Disastrous degradation only from this line(317KB/clock when 1+2x4+2+1 bytes pattern: 'skillessness'; 312KB/clock when 1+1x4+2+1 bytes pattern: 'underdog'), otherwise 368KB/clock.
         while ( countRemainder && *(char *)(pbPattern+1+(countSTATIC-countRemainder)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-countRemainder)) ) {
               countRemainder--;
         }
         //if ( countRemainder == 0) return((long)(pbTarget-cbPattern));
         if ( count+countRemainder == 0) return((long)(pbTarget-cbPattern));
         //}
        }
*/

        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else { //if ( cbPattern<4)
if (cbTarget<961) // This value is arbitrary(don't know how exactly), it ensures(at least must) better performance than 'Boyer_Moore_Horspool'.
{
			// A better strstr, with no asm code
			// Written by Mischa Sandberg
			// http://mischasan.wordpress.com
			// static char const *
			// scanstrm(char const *tgt, char const *pat, int len)
			// {
			//     uint32_t head = MSBF32(pat), wind = 0, next;
			// 
			//     pat += 4, len -= 4;
			//     while ((next = *(uint8_t const*)tgt++)) {
			//         wind = ( wind << 8 ) + next;
			//         if (wind == head && !memcmp(tgt, pat, len))
			//             return tgt - 4;
			//     }
			//     return  NULL;
			//}
			ulHashPattern = 0;
			ulHashPattern = ( ulHashPattern << 8 ) + *(uint8_t const*)pbPattern++;
			ulHashPattern = ( ulHashPattern << 8 ) + *(uint8_t const*)pbPattern++;
			ulHashPattern = ( ulHashPattern << 8 ) + *(uint8_t const*)pbPattern++;
			ulHashPattern = ( ulHashPattern << 8 ) + *(uint8_t const*)pbPattern++;
			AdvanceHopperGrass = 0;
			cbPattern -= 4;
			while ((ulHashTarget = *(uint8_t const*)pbTarget++)) {
				AdvanceHopperGrass = ( AdvanceHopperGrass << 8 ) + ulHashTarget;
				if (AdvanceHopperGrass == ulHashPattern && !memcmp(pbTarget, pbPattern, cbPattern))
				return pbTarget - 4;
			}
			return  NULL;


} else { //if (cbTarget<961)
        //countSTATIC = cbPattern-2; //r.6+
        //countSTATIC = cbPattern-2-3;
        countSTATIC = cbPattern-2-2; // r.6+++ I suppose that the awful degradation comes from 2bytes more (from either 'if (countSTATIC<0) countSTATIC=0;' or 'count >0' fixes) which make the function unfittable in code cache lines?!
        ulHashPattern = *(unsigned long *)(pbPattern);
// Next line fixes the BUG from r.6++: but with awful speed degradation! So the bug is fixed in the definitions by setting 'countSTATIC = cbPattern-2-2;', bug appears only for patterns with lengths of 4, The setback is one unnecessary comparison for 5bytes patterns, stupidly such setback exists (from before) for 4bytes as well.
//if (countSTATIC<0) countSTATIC=0;
    /* Preprocessing */
    for (a=0; a < ASIZE; a++) bm_bc[a]=cbPattern;
    for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1;

    /* Searching */
    //lastch=pbPattern[cbPattern-1];
    //firstch=pbPattern[0];
    i=0;
    while (i <= cbTarget-cbPattern) {
       ch=pbTarget[i+cbPattern-1];
       //if (ch ==lastch)
          //if (memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) OUTPUT(i);
          //if (ch == lastch && pbTarget[i] == firstch && memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) return(i);  // Kaze: The idea(to prevent execution of slower 'memcmp') is borrowed from Karp-Rabin i.e. to perform a slower check only when the target "looks like".
          //if (ch == pbPattern[cbPattern-1] && pbTarget[i] == pbPattern[0]) // r.6+
          //if (ch == pbPattern[cbPattern-1] && *(long *)&pbTarget[i] == *(long *)&pbPattern[0]) // No problema here since we have 4[+] long pattern here. Overlapping (1 byte recompared) when length=4, grmbl.
          if (ch == pbPattern[cbPattern-1] && *(long *)&pbTarget[i] == ulHashPattern) // No problema here since we have 4[+] long pattern here. Overlapping (1 byte recompared) when length=4, grmbl.
             {
         count = countSTATIC;
         //while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(&pbTarget[i]+1+(countSTATIC-count)) ) { // r.6+
// A BUG (in next line) crushed from r.6++: 'count !=0' becomes 'count >0' in r.6+++ but with awful speed degradation! So the bug is fixed outside the cycles by setting 'countSTATIC' from -1 to 0, bug appears only for patterns with lengths of 4.
         while ( count !=0 && *(char *)(pbPattern+1+3+(countSTATIC-count)) == *(char *)(&pbTarget[i]+1+3+(countSTATIC-count)) ) { // if pattern length is 4 or 5 we have count=-1 and count=0 respectively i.e. no need of comparing in-between chars.
               count--;
         }
         if ( count <= 0) return(pbTarget+i);
	     }
       i+=bm_bc[ch];
    }
    return(NULL);
} //if (cbTarget<961)
} //if ( cbPattern<4)
}
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm ]



// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm [
// Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char.
char * Railgun_Quadruplet_6pp (char * pbTarget,
     char * pbPattern,
     unsigned long cbTarget,
     unsigned long cbPattern)
{
    char * pbTargetMax = pbTarget + cbTarget;
    register unsigned long  ulHashPattern;
    unsigned long ulHashTarget;
    //unsigned long count; //r.6+
    signed long count;
    //unsigned long countSTATIC; //r.6+
    signed long countSTATIC;
//  unsigned long countRemainder;

/*
    const unsigned char SINGLET = *(char *)(pbPattern);
    const unsigned long Quadruplet2nd = SINGLET<<8;
    const unsigned long Quadruplet3rd = SINGLET<<16;
    const unsigned long Quadruplet4th = SINGLET<<24;
*/
    unsigned char SINGLET;
    unsigned long Quadruplet2nd;
    unsigned long Quadruplet3rd;
    unsigned long Quadruplet4th;

    unsigned long  AdvanceHopperGrass;

    long i; //BMH needed
    int a, j, bm_bc[ASIZE]; //BMH needed
    unsigned char ch; //BMH needed
//    unsigned char lastch, firstch; //BMH needed

    if (cbPattern > cbTarget)
        return(NULL);

// Doesn't work when cbPattern = 1
// The next IF-fragment works very well with cbPattern>1, OBVIOUSLY IT MUST BE UNROLLED(but crippled with less functionality) SINCE either cbPattern=2 or cbPattern=3!
if ( cbPattern<4) { // This IF makes me unhappy: it slows down from 390KB/clock to 367KB/clock for 'fast' pattern. This fragment(for 2..3 pattern lengths) is needed because I need a function different than strchr but sticking to strstr i.e. lengths above 1 are to be handled.
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));
//        countSTATIC = cbPattern-2;

if ( cbPattern==3) {
    for ( ;; )
    {
        if ( ulHashPattern == ( (*(char *)(pbTarget-3))<<8 ) + *(pbTarget-1) ) {
         if ( *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) return((pbTarget-3));
        }
        if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) pbTarget++;
        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else {
}
    for ( ;; )
    {
        // The line below gives for 'cbPattern'>=1:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/543
        // Karp_Rabin_Kaze_4_OCTETS performance: 372KB/clock
/*
        if ( (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
            return((long)(pbTarget-cbPattern));
*/

        // The fragment below gives for 'cbPattern'>=2:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/546
        // Karp_Rabin_Kaze_4_OCTETS performance: 370KB/clock

/*
//For 2 and 3 [
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
//         count = countSTATIC;
         count = cbPattern-2;
//         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
         while ( count && *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) { // Crippling i.e. only 2 and 3 chars are allowed!
               count--;
         }
         if ( count == 0) return((pbTarget-cbPattern));
        }
        if ( (char)(ulHashPattern>>8) != *(pbTarget-cbPattern+1) ) pbTarget++;
//For 2 and 3 ]
*/


        if ( ulHashPattern == ( (*(char *)(pbTarget-2))<<8 ) + *(pbTarget-1) )
            return((pbTarget-2));
        if ( (char)(ulHashPattern>>8) != *(pbTarget-1) ) pbTarget++;


        // The fragment below gives for 'cbPattern'>=2:
	// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/554
	// Karp_Rabin_Kaze_4_OCTETS performance: 364KB/clock
/*
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
         count = countSTATIC>>2;
         countRemainder = countSTATIC % 4;

         while ( count && *(unsigned long *)(pbPattern+1+((count-1)<<2)) == *(unsigned long *)(pbTarget-cbPattern+1+((count-1)<<2)) ) {
               count--;
         }
	 //if (count == 0) {  // Disastrous degradation only from this line(317KB/clock when 1+2x4+2+1 bytes pattern: 'skillessness'; 312KB/clock when 1+1x4+2+1 bytes pattern: 'underdog'), otherwise 368KB/clock.
         while ( countRemainder && *(char *)(pbPattern+1+(countSTATIC-countRemainder)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-countRemainder)) ) {
               countRemainder--;
         }
         //if ( countRemainder == 0) return((long)(pbTarget-cbPattern));
         if ( count+countRemainder == 0) return((long)(pbTarget-cbPattern));
         //}
        }
*/

        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else { //if ( cbPattern<4)
if (cbTarget<961) // This value is arbitrary(don't know how exactly), it ensures(at least must) better performance than 'Boyer_Moore_Horspool'.
{
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = *(unsigned long *)(pbPattern);
//        countSTATIC = cbPattern-1;

    //SINGLET = *(char *)(pbPattern);
    SINGLET = ulHashPattern & 0xFF;
    Quadruplet2nd = SINGLET<<8;
    Quadruplet3rd = SINGLET<<16;
    Quadruplet4th = SINGLET<<24;

    for ( ;; )
    {
	AdvanceHopperGrass = 0;
	ulHashTarget = *(unsigned long *)(pbTarget-cbPattern);

        if ( ulHashPattern == ulHashTarget ) { // Three unnecessary comparisons here, but 'AdvanceHopperGrass' must be calculated - it has a higher priority.
//         count = countSTATIC;
//         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
//	       if ( countSTATIC==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) AdvanceHopperGrass++;
//               count--;
//         }
         count = cbPattern-1;
         while ( count && *(char *)(pbPattern+(cbPattern-count)) == *(char *)(pbTarget-count) ) {
	       if ( cbPattern-1==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-count) ) AdvanceHopperGrass++;
               count--;
         }
         if ( count == 0) return((pbTarget-cbPattern));
        } else { // The goal here: to avoid memory accesses by stressing the registers.
    if ( Quadruplet2nd != (ulHashTarget & 0x0000FF00) ) {
         AdvanceHopperGrass++;
         if ( Quadruplet3rd != (ulHashTarget & 0x00FF0000) ) {
              AdvanceHopperGrass++;
              if ( Quadruplet4th != (ulHashTarget & 0xFF000000) ) AdvanceHopperGrass++;
         }
    }
	}

	AdvanceHopperGrass++;

	pbTarget = pbTarget + AdvanceHopperGrass;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else { //if (cbTarget<961)
        //countSTATIC = cbPattern-2; //r.6+
        //countSTATIC = cbPattern-2-3;
        countSTATIC = cbPattern-2-2; // r.6+++ I suppose that the awful degradation comes from 2bytes more (from either 'if (countSTATIC<0) countSTATIC=0;' or 'count >0' fixes) which make the function unfittable in code cache lines?!
        ulHashPattern = *(unsigned long *)(pbPattern);
// Next line fixes the BUG from r.6++: but with awful speed degradation! So the bug is fixed in the definitions by setting 'countSTATIC = cbPattern-2-2;', bug appears only for patterns with lengths of 4, The setback is one unnecessary comparison for 5bytes patterns, stupidly such setback exists (from before) for 4bytes as well.
//if (countSTATIC<0) countSTATIC=0;
    /* Preprocessing */
    for (a=0; a < ASIZE; a++) bm_bc[a]=cbPattern;
    for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1;

    /* Searching */
    //lastch=pbPattern[cbPattern-1];
    //firstch=pbPattern[0];
    i=0;
    while (i <= cbTarget-cbPattern) {
       ch=pbTarget[i+cbPattern-1];
       //if (ch ==lastch)
          //if (memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) OUTPUT(i);
          //if (ch == lastch && pbTarget[i] == firstch && memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) return(i);  // Kaze: The idea(to prevent execution of slower 'memcmp') is borrowed from Karp-Rabin i.e. to perform a slower check only when the target "looks like".
          //if (ch == pbPattern[cbPattern-1] && pbTarget[i] == pbPattern[0]) // r.6+
          //if (ch == pbPattern[cbPattern-1] && *(long *)&pbTarget[i] == *(long *)&pbPattern[0]) // No problema here since we have 4[+] long pattern here. Overlapping (1 byte recompared) when length=4, grmbl.
          if (ch == pbPattern[cbPattern-1] && *(long *)&pbTarget[i] == ulHashPattern) // No problema here since we have 4[+] long pattern here. Overlapping (1 byte recompared) when length=4, grmbl.
             {
         count = countSTATIC;
         //while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(&pbTarget[i]+1+(countSTATIC-count)) ) { // r.6+
// A BUG (in next line) crushed from r.6++: 'count !=0' becomes 'count >0' in r.6+++ but with awful speed degradation! So the bug is fixed outside the cycles by setting 'countSTATIC' from -1 to 0, bug appears only for patterns with lengths of 4.
         while ( count !=0 && *(char *)(pbPattern+1+3+(countSTATIC-count)) == *(char *)(&pbTarget[i]+1+3+(countSTATIC-count)) ) { // if pattern length is 4 or 5 we have count=-1 and count=0 respectively i.e. no need of comparing in-between chars.
               count--;
         }
         if ( count <= 0) return(pbTarget+i);
	     }
       i+=bm_bc[ch];
    }
    return(NULL);
} //if (cbTarget<961)
} //if ( cbPattern<4)
}
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm ]


// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm [
// Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char.
char * Railgun_Quadruplet_6pp_count_hits (char * pbTarget,
     char * pbPattern,
     unsigned long cbTarget,
     unsigned long cbPattern)
{
    char * pbTargetMax = pbTarget + cbTarget;
    register unsigned long  ulHashPattern;
    unsigned long ulHashTarget;
    //unsigned long count; //r.6+
    signed long count;
    //unsigned long countSTATIC; //r.6+
    signed long countSTATIC;
//  unsigned long countRemainder;

/*
    const unsigned char SINGLET = *(char *)(pbPattern);
    const unsigned long Quadruplet2nd = SINGLET<<8;
    const unsigned long Quadruplet3rd = SINGLET<<16;
    const unsigned long Quadruplet4th = SINGLET<<24;
*/
    unsigned char SINGLET;
    unsigned long Quadruplet2nd;
    unsigned long Quadruplet3rd;
    unsigned long Quadruplet4th;

    unsigned long  AdvanceHopperGrass;

    long i; //BMH needed
    int a, j, bm_bc[ASIZE]; //BMH needed
    unsigned char ch; //BMH needed
//    unsigned char lastch, firstch; //BMH needed

    if (cbPattern > cbTarget)
        return(NULL);

// Doesn't work when cbPattern = 1
// The next IF-fragment works very well with cbPattern>1, OBVIOUSLY IT MUST BE UNROLLED(but crippled with less functionality) SINCE either cbPattern=2 or cbPattern=3!
if ( cbPattern<4) { // This IF makes me unhappy: it slows down from 390KB/clock to 367KB/clock for 'fast' pattern. This fragment(for 2..3 pattern lengths) is needed because I need a function different than strchr but sticking to strstr i.e. lengths above 1 are to be handled.
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));
//        countSTATIC = cbPattern-2;

if ( cbPattern==3) {
    for ( ;; )
    {
        if ( ulHashPattern == ( (*(char *)(pbTarget-3))<<8 ) + *(pbTarget-1) ) {
         if ( *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) Railgunhits++; //return((pbTarget-3));
        }
        if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) pbTarget++;
        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else {
}
    for ( ;; )
    {
        // The line below gives for 'cbPattern'>=1:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/543
        // Karp_Rabin_Kaze_4_OCTETS performance: 372KB/clock
/*
        if ( (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
            return((long)(pbTarget-cbPattern));
*/

        // The fragment below gives for 'cbPattern'>=2:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/546
        // Karp_Rabin_Kaze_4_OCTETS performance: 370KB/clock

/*
//For 2 and 3 [
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
//         count = countSTATIC;
         count = cbPattern-2;
//         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
         while ( count && *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) { // Crippling i.e. only 2 and 3 chars are allowed!
               count--;
         }
         if ( count == 0) return((pbTarget-cbPattern));
        }
        if ( (char)(ulHashPattern>>8) != *(pbTarget-cbPattern+1) ) pbTarget++;
//For 2 and 3 ]
*/


        if ( ulHashPattern == ( (*(char *)(pbTarget-2))<<8 ) + *(pbTarget-1) )
            Railgunhits++; //return((pbTarget-2));
        if ( (char)(ulHashPattern>>8) != *(pbTarget-1) ) pbTarget++;


        // The fragment below gives for 'cbPattern'>=2:
	// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/554
	// Karp_Rabin_Kaze_4_OCTETS performance: 364KB/clock
/*
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
         count = countSTATIC>>2;
         countRemainder = countSTATIC % 4;

         while ( count && *(unsigned long *)(pbPattern+1+((count-1)<<2)) == *(unsigned long *)(pbTarget-cbPattern+1+((count-1)<<2)) ) {
               count--;
         }
	 //if (count == 0) {  // Disastrous degradation only from this line(317KB/clock when 1+2x4+2+1 bytes pattern: 'skillessness'; 312KB/clock when 1+1x4+2+1 bytes pattern: 'underdog'), otherwise 368KB/clock.
         while ( countRemainder && *(char *)(pbPattern+1+(countSTATIC-countRemainder)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-countRemainder)) ) {
               countRemainder--;
         }
         //if ( countRemainder == 0) return((long)(pbTarget-cbPattern));
         if ( count+countRemainder == 0) return((long)(pbTarget-cbPattern));
         //}
        }
*/

        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else { //if ( cbPattern<4)
if (cbTarget<961) // This value is arbitrary(don't know how exactly), it ensures(at least must) better performance than 'Boyer_Moore_Horspool'.
{
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = *(unsigned long *)(pbPattern);
//        countSTATIC = cbPattern-1;

    //SINGLET = *(char *)(pbPattern);
    SINGLET = ulHashPattern & 0xFF;
    Quadruplet2nd = SINGLET<<8;
    Quadruplet3rd = SINGLET<<16;
    Quadruplet4th = SINGLET<<24;

    for ( ;; )
    {
	AdvanceHopperGrass = 0;
	ulHashTarget = *(unsigned long *)(pbTarget-cbPattern);

        if ( ulHashPattern == ulHashTarget ) { // Three unnecessary comparisons here, but 'AdvanceHopperGrass' must be calculated - it has a higher priority.
//         count = countSTATIC;
//         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
//	       if ( countSTATIC==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) AdvanceHopperGrass++;
//               count--;
//         }
         count = cbPattern-1;
         while ( count && *(char *)(pbPattern+(cbPattern-count)) == *(char *)(pbTarget-count) ) {
	       if ( cbPattern-1==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-count) ) AdvanceHopperGrass++;
               count--;
         }
         if ( count == 0) Railgunhits++; //return((pbTarget-cbPattern));
        } else { // The goal here: to avoid memory accesses by stressing the registers.
    if ( Quadruplet2nd != (ulHashTarget & 0x0000FF00) ) {
         AdvanceHopperGrass++;
         if ( Quadruplet3rd != (ulHashTarget & 0x00FF0000) ) {
              AdvanceHopperGrass++;
              if ( Quadruplet4th != (ulHashTarget & 0xFF000000) ) AdvanceHopperGrass++;
         }
    }
	}

	AdvanceHopperGrass++;

	pbTarget = pbTarget + AdvanceHopperGrass;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else { //if (cbTarget<961)
        //countSTATIC = cbPattern-2; //r.6+
        //countSTATIC = cbPattern-2-3;
        countSTATIC = cbPattern-2-2; // r.6+++ I suppose that the awful degradation comes from 2bytes more (from either 'if (countSTATIC<0) countSTATIC=0;' or 'count >0' fixes) which make the function unfittable in code cache lines?!
        ulHashPattern = *(unsigned long *)(pbPattern);
// Next line fixes the BUG from r.6++: but with awful speed degradation! So the bug is fixed in the definitions by setting 'countSTATIC = cbPattern-2-2;', bug appears only for patterns with lengths of 4, The setback is one unnecessary comparison for 5bytes patterns, stupidly such setback exists (from before) for 4bytes as well.
//if (countSTATIC<0) countSTATIC=0;
    /* Preprocessing */
    for (a=0; a < ASIZE; a++) bm_bc[a]=cbPattern;
    for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1;

    /* Searching */
    //lastch=pbPattern[cbPattern-1];
    //firstch=pbPattern[0];
    i=0;
    while (i <= cbTarget-cbPattern) {
       ch=pbTarget[i+cbPattern-1];
       //if (ch ==lastch)
          //if (memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) OUTPUT(i);
          //if (ch == lastch && pbTarget[i] == firstch && memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) return(i);  // Kaze: The idea(to prevent execution of slower 'memcmp') is borrowed from Karp-Rabin i.e. to perform a slower check only when the target "looks like".
          //if (ch == pbPattern[cbPattern-1] && pbTarget[i] == pbPattern[0]) // r.6+
          //if (ch == pbPattern[cbPattern-1] && *(long *)&pbTarget[i] == *(long *)&pbPattern[0]) // No problema here since we have 4[+] long pattern here. Overlapping (1 byte recompared) when length=4, grmbl.
          if (ch == pbPattern[cbPattern-1] && *(long *)&pbTarget[i] == ulHashPattern) // No problema here since we have 4[+] long pattern here. Overlapping (1 byte recompared) when length=4, grmbl.
             {
         count = countSTATIC;
         //while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(&pbTarget[i]+1+(countSTATIC-count)) ) { // r.6+
// A BUG (in next line) crushed from r.6++: 'count !=0' becomes 'count >0' in r.6+++ but with awful speed degradation! So the bug is fixed outside the cycles by setting 'countSTATIC' from -1 to 0, bug appears only for patterns with lengths of 4.
         while ( count !=0 && *(char *)(pbPattern+1+3+(countSTATIC-count)) == *(char *)(&pbTarget[i]+1+3+(countSTATIC-count)) ) { // if pattern length is 4 or 5 we have count=-1 and count=0 respectively i.e. no need of comparing in-between chars.
               count--;
         }
         if ( count <= 0) Railgunhits++; //return(pbTarget+i);
	     }
       i+=bm_bc[ch];
    }
    return(NULL);
} //if (cbTarget<961)
} //if ( cbPattern<4)
}
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm ]


// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm [
// Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char.
char * Railgun_Quadruplet_7m (char * pbTarget,
     char * pbPattern,
     unsigned long cbTarget,
     unsigned long cbPattern)
{
    char * pbTargetMax = pbTarget + cbTarget;
    register unsigned long  ulHashPattern;
    unsigned long ulHashTarget;
    //unsigned long count; //r.6+
    signed long count;
    //unsigned long countSTATIC; //r.6+
    signed long countSTATIC;
//  unsigned long countRemainder;

/*
    const unsigned char SINGLET = *(char *)(pbPattern);
    const unsigned long Quadruplet2nd = SINGLET<<8;
    const unsigned long Quadruplet3rd = SINGLET<<16;
    const unsigned long Quadruplet4th = SINGLET<<24;
*/
    unsigned char SINGLET;
    unsigned long Quadruplet2nd;
    unsigned long Quadruplet3rd;
    unsigned long Quadruplet4th;

    unsigned long  AdvanceHopperGrass;

    long i; //BMH needed
    int a, j, bm_bc[ASIZE]; //BMH needed
    unsigned char ch; //BMH needed
    unsigned long chchchch; //BMH needed
//    unsigned char lastch, firstch; //BMH needed

    if (cbPattern > cbTarget)
        return(NULL);

// Doesn't work when cbPattern = 1
// The next IF-fragment works very well with cbPattern>1, OBVIOUSLY IT MUST BE UNROLLED(but crippled with less functionality) SINCE either cbPattern=2 or cbPattern=3!
if ( cbPattern<4) { // This IF makes me unhappy: it slows down from 390KB/clock to 367KB/clock for 'fast' pattern. This fragment(for 2..3 pattern lengths) is needed because I need a function different than strchr but sticking to strstr i.e. lengths above 1 are to be handled.
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));
//        countSTATIC = cbPattern-2;

if ( cbPattern==3) {
    for ( ;; )
    {
        if ( ulHashPattern == ( (*(char *)(pbTarget-3))<<8 ) + *(pbTarget-1) ) {
         if ( *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) return((pbTarget-3));
        }
        if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) pbTarget++;
        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else {
}
    for ( ;; )
    {
        // The line below gives for 'cbPattern'>=1:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/543
        // Karp_Rabin_Kaze_4_OCTETS performance: 372KB/clock
/*
        if ( (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
            return((long)(pbTarget-cbPattern));
*/

        // The fragment below gives for 'cbPattern'>=2:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/546
        // Karp_Rabin_Kaze_4_OCTETS performance: 370KB/clock

/*
//For 2 and 3 [
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
//         count = countSTATIC;
         count = cbPattern-2;
//         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
         while ( count && *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) { // Crippling i.e. only 2 and 3 chars are allowed!
               count--;
         }
         if ( count == 0) return((pbTarget-cbPattern));
        }
        if ( (char)(ulHashPattern>>8) != *(pbTarget-cbPattern+1) ) pbTarget++;
//For 2 and 3 ]
*/


        if ( ulHashPattern == ( (*(char *)(pbTarget-2))<<8 ) + *(pbTarget-1) )
            return((pbTarget-2));
        if ( (char)(ulHashPattern>>8) != *(pbTarget-1) ) pbTarget++;


        // The fragment below gives for 'cbPattern'>=2:
	// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/554
	// Karp_Rabin_Kaze_4_OCTETS performance: 364KB/clock
/*
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
         count = countSTATIC>>2;
         countRemainder = countSTATIC % 4;

         while ( count && *(unsigned long *)(pbPattern+1+((count-1)<<2)) == *(unsigned long *)(pbTarget-cbPattern+1+((count-1)<<2)) ) {
               count--;
         }
	 //if (count == 0) {  // Disastrous degradation only from this line(317KB/clock when 1+2x4+2+1 bytes pattern: 'skillessness'; 312KB/clock when 1+1x4+2+1 bytes pattern: 'underdog'), otherwise 368KB/clock.
         while ( countRemainder && *(char *)(pbPattern+1+(countSTATIC-countRemainder)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-countRemainder)) ) {
               countRemainder--;
         }
         //if ( countRemainder == 0) return((long)(pbTarget-cbPattern));
         if ( count+countRemainder == 0) return((long)(pbTarget-cbPattern));
         //}
        }
*/

        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else { //if ( cbPattern<4)
if (cbTarget<961) // This value is arbitrary(don't know how exactly), it ensures(at least must) better performance than 'Boyer_Moore_Horspool'.
{
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = *(unsigned long *)(pbPattern);
//        countSTATIC = cbPattern-1;

    //SINGLET = *(char *)(pbPattern);
    SINGLET = ulHashPattern & 0xFF;
    Quadruplet2nd = SINGLET<<8;
    Quadruplet3rd = SINGLET<<16;
    Quadruplet4th = SINGLET<<24;

    for ( ;; )
    {
	AdvanceHopperGrass = 0;
	ulHashTarget = *(unsigned long *)(pbTarget-cbPattern);

        if ( ulHashPattern == ulHashTarget ) { // Three unnecessary comparisons here, but 'AdvanceHopperGrass' must be calculated - it has a higher priority.
//         count = countSTATIC;
//         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
//	       if ( countSTATIC==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) AdvanceHopperGrass++;
//               count--;
//         }
         count = cbPattern-1;
         while ( count && *(char *)(pbPattern+(cbPattern-count)) == *(char *)(pbTarget-count) ) {
	       if ( cbPattern-1==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-count) ) AdvanceHopperGrass++;
               count--;
         }
         if ( count == 0) return((pbTarget-cbPattern));
        } else { // The goal here: to avoid memory accesses by stressing the registers.
    if ( Quadruplet2nd != (ulHashTarget & 0x0000FF00) ) {
         AdvanceHopperGrass++;
         if ( Quadruplet3rd != (ulHashTarget & 0x00FF0000) ) {
              AdvanceHopperGrass++;
              if ( Quadruplet4th != (ulHashTarget & 0xFF000000) ) AdvanceHopperGrass++;
         }
    }
	}

	AdvanceHopperGrass++;

	pbTarget = pbTarget + AdvanceHopperGrass;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else { //if (cbTarget<961)
        //countSTATIC = cbPattern-2; //r.6+
        //countSTATIC = cbPattern-2-3;
        //countSTATIC = cbPattern-2-2; // r.6+++ I suppose that the awful degradation comes from 2bytes more (from either 'if (countSTATIC<0) countSTATIC=0;' or 'count >0' fixes) which make the function unfittable in code cache lines?!
        //countSTATIC = cbPattern-2-3; // r.7- At last no recompared bytes in-between chars
        countSTATIC = cbPattern-2-2; // r.7 
        ulHashPattern = *(unsigned long *)(pbPattern+cbPattern-1-3);
// Next line fixes the BUG from r.6++: but with awful speed degradation! So the bug is fixed in the definitions by setting 'countSTATIC = cbPattern-2-2;', bug appears only for patterns with lengths of 4, The setback is one unnecessary comparison for 5bytes patterns, stupidly such setback exists (from before) for 4bytes as well.
//if (countSTATIC<0) countSTATIC=0;
    /* Preprocessing */
    for (a=0; a < ASIZE; a++) bm_bc[a]=cbPattern;
    for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1;

    /* Searching */
    //lastch=pbPattern[cbPattern-1];
    //firstch=pbPattern[0];
    i=0;
    while (i <= cbTarget-cbPattern) {
       //ch=pbTarget[i+cbPattern-1];
       chchchch=*(unsigned long *)&pbTarget[i+cbPattern-1-3];
       //if (ch ==lastch)
          //if (memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) OUTPUT(i);
          //if (ch == lastch && pbTarget[i] == firstch && memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) return(i);  // Kaze: The idea(to prevent execution of slower 'memcmp') is borrowed from Karp-Rabin i.e. to perform a slower check only when the target "looks like".
          //if (ch == pbPattern[cbPattern-1] && pbTarget[i] == pbPattern[0]) // r.6+
          //if (ch == pbPattern[cbPattern-1] && *(long *)&pbTarget[i] == *(long *)&pbPattern[0]) // No problema here since we have 4[+] long pattern here. Overlapping (1 byte recompared) when length=4, grmbl.
          //if (ch == pbPattern[cbPattern-1] && *(long *)&pbTarget[i] == ulHashPattern) // No problema here since we have 4[+] long pattern here. Overlapping (1 byte recompared) when length=4, grmbl.
          if (chchchch == ulHashPattern) // No problema here since we have 4[+] long pattern here. Overlapping (1 byte recompared) when length=4, grmbl.
             {
         count = countSTATIC;
         //while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(&pbTarget[i]+1+(countSTATIC-count)) ) { // r.6+
// A BUG (in next line) crushed from r.6++: 'count !=0' becomes 'count >0' in r.6+++ but with awful speed degradation! So the bug is fixed outside the cycles by setting 'countSTATIC' from -1 to 0, bug appears only for patterns with lengths of 4.
         //while ( count !=0 && *(char *)(pbPattern+1+3+(countSTATIC-count)) == *(char *)(&pbTarget[i]+1+3+(countSTATIC-count)) ) { // if pattern length is 4 or 5 we have count=-1 and count=0 respectively i.e. no need of comparing in-between chars.
// The next line is somewhat stupid - it compares backwardly, AFAIK the CPU won't cache the previous 32bytes but the next?!
         while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)) == *(char *)(&pbTarget[i]+(countSTATIC-count)) ) { // if pattern length is 4 or 5 we have count=-1 and count=0 respectively i.e. no need of comparing in-between chars.
               count--;
         }
         if ( count == 0) return(pbTarget+i);
	     }
       i+=bm_bc[(char)(chchchch>>24)];
    }
    return(NULL);
} //if (cbTarget<961)
} //if ( cbPattern<4)
}
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm ]



// Revision: 2, 2012-Jan-30, the main disadvantage: the preprocessing overhead.
// Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char.
// 
char * Railgun_Quadruplet_7Gulliver (char * pbTarget, char * pbPattern, unsigned long cbTarget, unsigned long cbPattern)
{
	char * pbTargetMax = pbTarget + cbTarget;
	register unsigned long ulHashPattern;
	register unsigned long ulHashTarget;
	signed long count;
	signed long countSTATIC;

	unsigned char SINGLET;
	unsigned long Quadruplet2nd;
	unsigned long Quadruplet3rd;
	unsigned long Quadruplet4th;

	unsigned long  AdvanceHopperGrass;

	long i; //BMH needed
	int a, j;
	unsigned int bm_bc[256]; //BMH needed
	unsigned int bm_bc2nd[256]; //BMS needed
	unsigned char bm_Horspool_Order2[256*256]; //BMHSS(Elsiane) needed, 'char' limits patterns to 255, if 'long' then table becomes 256KB, grrr.
	unsigned long Gulliver; // or unsigned char or unsigned short

	if (cbPattern > cbTarget)
		return(NULL);

	if ( cbPattern<4) { 
		pbTarget = pbTarget+cbPattern;
		ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));

		if ( cbPattern==3) {
			for ( ;; ) {
				if ( ulHashPattern == ( (*(char *)(pbTarget-3))<<8 ) + *(pbTarget-1) ) {
					if ( *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) return((pbTarget-3));
				}
				if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) pbTarget++;
				pbTarget++;
				if (pbTarget > pbTargetMax)
					return(NULL);
			}
		} else {
		}
		for ( ;; ) {
			if ( ulHashPattern == ( (*(char *)(pbTarget-2))<<8 ) + *(pbTarget-1) )
				return((pbTarget-2));
			if ( (char)(ulHashPattern>>8) != *(pbTarget-1) ) pbTarget++;
			pbTarget++;
			if (pbTarget > pbTargetMax)
				return(NULL);
		}
	} else { //if ( cbPattern<4)
		if (cbTarget<961) { // This value is arbitrary(don't know how exactly), it ensures(at least must) better performance than 'Boyer_Moore_Horspool'.
			// A better strstr, with no asm code
			// Written by Mischa Sandberg
			// http://mischasan.wordpress.com
			// static char const *
			// scanstrm(char const *tgt, char const *pat, int len)
			// {
			//     uint32_t head = MSBF32(pat), wind = 0, next;
			// 
			//     pat += 4, len -= 4;
			//     while ((next = *(uint8_t const*)tgt++)) {
			//         wind = ( wind << 8 ) + next;
			//         if (wind == head && !memcmp(tgt, pat, len))
			//             return tgt - 4;
			//     }
			//     return  NULL;
			//}
			ulHashPattern = 0;
			ulHashPattern = ( ulHashPattern << 8 ) + *(uint8_t const*)pbPattern++;
			ulHashPattern = ( ulHashPattern << 8 ) + *(uint8_t const*)pbPattern++;
			ulHashPattern = ( ulHashPattern << 8 ) + *(uint8_t const*)pbPattern++;
			ulHashPattern = ( ulHashPattern << 8 ) + *(uint8_t const*)pbPattern++;
			AdvanceHopperGrass = 0;
			cbPattern -= 4;
			while ((ulHashTarget = *(uint8_t const*)pbTarget++)) {
				AdvanceHopperGrass = ( AdvanceHopperGrass << 8 ) + ulHashTarget;
				if (AdvanceHopperGrass == ulHashPattern && !memcmp(pbTarget, pbPattern, cbPattern))
				return pbTarget - 4;
			}
			return  NULL;
/*
			pbTarget = pbTarget+cbPattern;
			ulHashPattern = *(unsigned long *)(pbPattern);

			SINGLET = ulHashPattern & 0xFF;
			Quadruplet2nd = SINGLET<<8;
			Quadruplet3rd = SINGLET<<16;
			Quadruplet4th = SINGLET<<24;

			for ( ;; ) {
				AdvanceHopperGrass = 0;
				ulHashTarget = *(unsigned long *)(pbTarget-cbPattern);

			        if ( ulHashPattern == ulHashTarget ) { // Three unnecessary comparisons here, but 'AdvanceHopperGrass' must be calculated - it has a higher priority.
					count = cbPattern-1;
					while ( count && *(char *)(pbPattern+(cbPattern-count)) == *(char *)(pbTarget-count) ) {
						if ( cbPattern-1==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-count) ) AdvanceHopperGrass++;
						count--;
					}
					if ( count == 0) return((pbTarget-cbPattern));
			        } else { // The goal here: to avoid memory accesses by stressing the registers.
					if ( Quadruplet2nd != (ulHashTarget & 0x0000FF00) ) {
						AdvanceHopperGrass++;
						if ( Quadruplet3rd != (ulHashTarget & 0x00FF0000) ) {
							AdvanceHopperGrass++;
							if ( Quadruplet4th != (ulHashTarget & 0xFF000000) ) AdvanceHopperGrass++;
						}
					}
				}

				AdvanceHopperGrass++;

				pbTarget = pbTarget + AdvanceHopperGrass;
				if (pbTarget > pbTargetMax)
					return(NULL);
			}
*/
		} else { //if (cbTarget<961)
			countSTATIC = cbPattern-2-2;

			for (a=0; a < 256; a++) {bm_bc[a]=cbPattern; bm_bc2nd[a]=cbPattern+1;}
			for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1; 
			for (j=0; j < cbPattern; j++) bm_bc2nd[pbPattern[j]]=cbPattern-j; 

			// Elsiane r.2  [
			for (a=0; a < 256*256; a++) {bm_Horspool_Order2[a]= cbPattern-1;} // 'memset' if not optimized

			// alfalfa 7 long 6 BBs (al lf fa al lf fa) 3 distinct BBs (al lf fa) 
			// fast 4 0-1-2 fa as st
			for (j=0; j < cbPattern-1; j++) bm_Horspool_Order2[*(unsigned short *)(pbPattern+j)]=j; // Rightmost appearance/position is needed

			// Elsiane r.2 ]

			ulHashPattern = *(unsigned long *)(pbPattern); // First four bytes
			//ulHashTarget = *(unsigned short *)(pbPattern+cbPattern-1-1); // Last two bytes
		
			i=0;
	if ( cbPattern>10) { // r.2
			while (i <= cbTarget-cbPattern-1) { // -1 because Sunday is used
				Gulliver = bm_Horspool_Order2[*(unsigned short *)&pbTarget[i+cbPattern-1-1]];

				if ( Gulliver == cbPattern-2 ) { // CASE #1: means the pair (char order 2) is found
					if ( *(unsigned long *)&pbTarget[i] == ulHashPattern) {
						count = countSTATIC; // Last two chars already matched, to be fixed with -2
						while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)+4) == *(char *)(&pbTarget[i]+(countSTATIC-count)+4) )
							count--;
						if ( count == 0) return(pbTarget+i);
					}
					//i = i + 1; // r.1, obviuosly this is the worst skip so turning to 'SunHorse': lines below
// r.2 [
if ( bm_bc[(unsigned char)pbTarget[i+cbPattern-1]] < bm_bc2nd[(unsigned char)pbTarget[i+(cbPattern)]] )
         i= i + bm_bc2nd[(unsigned char)pbTarget[i+(cbPattern)]];
else
         i= i + bm_bc[(unsigned char)pbTarget[i+cbPattern-1]];
// r.2 ]
				} else if ( Gulliver == cbPattern-1 ) // CASE #2: means the pair (char order 2) is not found
					i = i + Gulliver; // the pair is not found, skip the whole pattern and fall back (Order-1) chars i.e. one char for Order 2
				else
					i = i +  cbPattern - Gulliver - 2; // CASE #3: the pair is found and not as suffix i.e. rightmost position

// 32323218 Order 1 Horspool
// fa af fa af fa as st Order 2 Horspool
//  0  1  2  3  4  5  6
// HIKARIfast
// fafafast
//   fafafast +2 Order 1 'a' vs 't'
//   fafafast +2 = (cbPattern-Gulliver-2 = 8-4-2 = 2) Order 2 'fa' vs 'st' i.e. CASE #3

// 76543218 Order 1 Horspool
// lo on ng gp pa ac ce Order 2 Horspool
//  0  1  2  3  4  5  6
// HIKARIfast
// longpace
//   longpace +2 Order 1 'a' vs 'e'
//        longpace +7 = (cbPattern-1 = 8-1 = 7) Order 2 'fa' vs 'ce' i.e. CASE #2

				GlobalI++;
			}
	} else { // r.2
			while (i <= cbTarget-cbPattern-1) {
				if ( *(unsigned long *)&pbTarget[i] == ulHashPattern) {
					count = countSTATIC;
					while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)+4) == *(char *)(&pbTarget[i]+(countSTATIC-count)+4) ) { // if pattern length is 4 or 5 we have count=-1 and count=0 respectively i.e. no need of comparing in-between chars.
						count--;
					}
					if ( count == 0) return(pbTarget+i);
				}
				i= i + bm_bc2nd[(unsigned char)pbTarget[i+(cbPattern)]];
				GlobalI++;
			}
	} // r.2

			if (i == cbTarget-cbPattern) {
				if ( *(unsigned long *)&pbTarget[i] == ulHashPattern) {
					count = countSTATIC;
					while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)+4) == *(char *)(&pbTarget[i]+(countSTATIC-count)+4) )
						count--;
					if ( count == 0) return(pbTarget+i);
				}
				GlobalI++;
			}

			return(NULL);
		} //if (cbTarget<961)
	} //if ( cbPattern<4)
}
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Sunday-Horspool algorithm ]



// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Sunday-Horspool algorithm [
/*
Tuning continues but the skeleton is built, I see 'Gulliver' as a really High-Performance etude.
And not to be empty-handed here the Gulliver's swiftness is benchmarked on String(206,908,949bytes) as-one-line:

Pattern: fast
Railgun_Quadruplet_7sun performance:      1057KB/clock / 456%, 45330622 skips/iterations
Railgun_Quadruplet_7 performance:         0976KB/clock / 377%, 54788054 skips/iterations
Railgun_Quadruplet_7sunhorse performance: 0649KB/clock / 480%, 43103056 skips/iterations
Railgun_Quadruplet_7deuce performance:    0564KB/clock / 389%, 53138919 skips/iterations
Railgun_Quadruplet_7Elsiane performance:  0505KB/clock / 551%, 37541955 skips/iterations
Railgun_Quadruplet_7Gulliver performance: 0780KB/clock / 300%, 68943184 skips/iterations
Boyer_Moore_Flensburg performance:        0486KB/clock / 377%, 54788139 skips/iterations

Pattern: faster
Railgun_Quadruplet_7sun performance:      1356KB/clock / 591%, 34996936 skips/iterations
Railgun_Quadruplet_7 performance:         1320KB/clock / 514%, 40194194 skips/iterations
Railgun_Quadruplet_7sunhorse performance: 0771KB/clock / 656%, 31504148 skips/iterations
Railgun_Quadruplet_7deuce performance:    0651KB/clock / 567%, 36434006 skips/iterations
Railgun_Quadruplet_7Elsiane performance:  0535KB/clock / 710%, 29101626 skips/iterations
Railgun_Quadruplet_7Gulliver performance: 1195KB/clock / 498%, 41544613 skips/iterations
Boyer_Moore_Flensburg performance:        0684KB/clock / 514%, 40194282 skips/iterations

Pattern: fastest
Railgun_Quadruplet_7sun performance:      1554KB/clock / 687%, 30084306 skips/iterations
Railgun_Quadruplet_7 performance:         1519KB/clock / 599%, 34540430 skips/iterations
Railgun_Quadruplet_7sunhorse performance: 0918KB/clock / 761%, 27188853 skips/iterations
Railgun_Quadruplet_7deuce performance:    0771KB/clock / 663%, 31175827 skips/iterations
Railgun_Quadruplet_7Elsiane performance:  0627KB/clock / 818%, 25281493 skips/iterations
Railgun_Quadruplet_7Gulliver performance: 1453KB/clock / 595%, 34744153 skips/iterations
Boyer_Moore_Flensburg performance:        0792KB/clock / 621%, 33278240 skips/iterations

Pattern: fastest fox
Railgun_Quadruplet_7sun performance:      1712KB/clock / 775%, 26672940 skips/iterations
Railgun_Quadruplet_7 performance:         1669KB/clock / 669%, 30925578 skips/iterations
Railgun_Quadruplet_7sunhorse performance: 0962KB/clock / 912%, 22663583 skips/iterations
Railgun_Quadruplet_7deuce performance:    0808KB/clock / 797%, 25945709 skips/iterations
Railgun_Quadruplet_7Elsiane performance:  0719KB/clock / 931%, 22213101 skips/iterations
Railgun_Quadruplet_7Gulliver performance: 2126KB/clock / 977%, 21166516 skips/iterations
Boyer_Moore_Flensburg performance:        1074KB/clock / 669%, 30925649 skips/iterations

Pattern: fastest fox with biggest strides
Railgun_Quadruplet_7sun performance:      2658KB/clock / 1584%, 13060463 skips/iterations
Railgun_Quadruplet_7 performance:         2767KB/clock / 1511%, 13689243 skips/iterations
Railgun_Quadruplet_7sunhorse performance: 1820KB/clock / 2138%, 09677267 skips/iterations
Railgun_Quadruplet_7deuce performance:    1669KB/clock / 2053%, 10075650 skips/iterations
Railgun_Quadruplet_7Elsiane performance:  1554KB/clock / 2143%, 09652548 skips/iterations
Railgun_Quadruplet_7Gulliver performance: 3157KB/clock / 2924%, 07074287 skips/iterations  Stratosphere-borne!
Boyer_Moore_Flensburg performance:        1836KB/clock / 1554%, 13307181 skips/iterations

Pattern: fastest fox with biggest strides known to me
Railgun_Quadruplet_7sun performance:      2590KB/clock / 1548%, 13363356 skips/iterations
Railgun_Quadruplet_7 performance:         2694KB/clock / 1447%, 14292419 skips/iterations
Railgun_Quadruplet_7sunhorse performance: 1924KB/clock / 2234%, 09259505 skips/iterations
Railgun_Quadruplet_7deuce performance:    1741KB/clock / 2011%, 10287584 skips/iterations
Railgun_Quadruplet_7Elsiane performance:  1683KB/clock / 2240%, 09236188 skips/iterations
Railgun_Quadruplet_7Gulliver performance: 3157KB/clock / 3832%, 05399116 skips/iterations  Mesosphere-borne!
Boyer_Moore_Flensburg performance:        1741KB/clock / 1540%, 13431751 skips/iterations

Pattern: fastest fox with biggest strides known to me up to 2012 January 26 namely 'Gulliver'
Railgun_Quadruplet_7sun performance:      3108KB/clock / 2890%, 07159321 skips/iterations
Railgun_Quadruplet_7 performance:         3108KB/clock / 2742%, 07545141 skips/iterations
Railgun_Quadruplet_7sunhorse performance: 2590KB/clock / 4138%, 04999777 skips/iterations
Railgun_Quadruplet_7deuce performance:    2464KB/clock / 4029%, 05135444 skips/iterations
Railgun_Quadruplet_7Elsiane performance:  2557KB/clock / 4141%, 04995463 skips/iterations
Railgun_Quadruplet_7Gulliver performance: 3157KB/clock / 7218%, 02866192 skips/iterations  Vacuum-borne!
Boyer_Moore_Flensburg performance:        2767KB/clock / 2745%, 07536097 skips/iterations
*/
// 
// Revision: 2, 2012-Jan-30, the main disadvantage: the preprocessing overhead.
// Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char.
// 
char * Railgun_Quadruplet_7Gulliver_count_hits (char * pbTarget, char * pbPattern, unsigned long cbTarget, unsigned long cbPattern)
{
	char * pbTargetMax = pbTarget + cbTarget;
	register unsigned long ulHashPattern;
	register unsigned long ulHashTarget;
	signed long count;
	signed long countSTATIC;

	unsigned char SINGLET;
	unsigned long Quadruplet2nd;
	unsigned long Quadruplet3rd;
	unsigned long Quadruplet4th;

	unsigned long  AdvanceHopperGrass;

	long i; //BMH needed
	int a, j;
	unsigned int bm_bc[256]; //BMH needed
	unsigned int bm_bc2nd[256]; //BMS needed
	unsigned char bm_Horspool_Order2[256*256]; //BMHSS(Elsiane) needed, 'char' limits patterns to 255, if 'long' then table becomes 256KB, grrr.
	unsigned long Gulliver; // or unsigned char or unsigned short

	if (cbPattern > cbTarget)
		return(NULL);

	if ( cbPattern<4) { 
		pbTarget = pbTarget+cbPattern;
		ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));

		if ( cbPattern==3) {
			for ( ;; ) {
				if ( ulHashPattern == ( (*(char *)(pbTarget-3))<<8 ) + *(pbTarget-1) ) {
					if ( *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) Railgunhits++; //return((pbTarget-3));
				}
				if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) pbTarget++;
				pbTarget++;
				if (pbTarget > pbTargetMax)
					return(NULL);
			}
		} else {
		}
		for ( ;; ) {
			if ( ulHashPattern == ( (*(char *)(pbTarget-2))<<8 ) + *(pbTarget-1) )
				Railgunhits++; //return((pbTarget-2));
			if ( (char)(ulHashPattern>>8) != *(pbTarget-1) ) pbTarget++;
			pbTarget++;
			if (pbTarget > pbTargetMax)
				return(NULL);
		}
	} else { //if ( cbPattern<4)
		if (cbTarget<961) { // This value is arbitrary(don't know how exactly), it ensures(at least must) better performance than 'Boyer_Moore_Horspool'.
/*
			// A better strstr, with no asm code
			// Written by Mischa Sandberg
			// http://mischasan.wordpress.com
			// static char const *
			// scanstrm(char const *tgt, char const *pat, int len)
			// {
			//     uint32_t head = MSBF32(pat), wind = 0, next;
			// 
			//     pat += 4, len -= 4;
			//     while ((next = *(uint8_t const*)tgt++)) {
			//         wind = ( wind << 8 ) + next;
			//         if (wind == head && !memcmp(tgt, pat, len))
			//             return tgt - 4;
			//     }
			//     return  NULL;
			//}
			ulHashPattern = 0;
			ulHashPattern = ( ulHashPattern << 8 ) + *(uint8_t const*)pbPattern++;
			ulHashPattern = ( ulHashPattern << 8 ) + *(uint8_t const*)pbPattern++;
			ulHashPattern = ( ulHashPattern << 8 ) + *(uint8_t const*)pbPattern++;
			ulHashPattern = ( ulHashPattern << 8 ) + *(uint8_t const*)pbPattern++;
			AdvanceHopperGrass = 0;
			cbPattern -= 4;
			while ((ulHashTarget = *(uint8_t const*)pbTarget++)) {
				AdvanceHopperGrass = ( AdvanceHopperGrass << 8 ) + ulHashTarget;
				if (AdvanceHopperGrass == ulHashPattern && !memcmp(pbTarget, pbPattern, cbPattern))
				Railgunhits++; //return pbTarget - 4;
			}
			return  NULL;
*/
			pbTarget = pbTarget+cbPattern;
			ulHashPattern = *(unsigned long *)(pbPattern);

			SINGLET = ulHashPattern & 0xFF;
			Quadruplet2nd = SINGLET<<8;
			Quadruplet3rd = SINGLET<<16;
			Quadruplet4th = SINGLET<<24;

			for ( ;; ) {
				AdvanceHopperGrass = 0;
				ulHashTarget = *(unsigned long *)(pbTarget-cbPattern);

			        if ( ulHashPattern == ulHashTarget ) { // Three unnecessary comparisons here, but 'AdvanceHopperGrass' must be calculated - it has a higher priority.
					count = cbPattern-1;
					while ( count && *(char *)(pbPattern+(cbPattern-count)) == *(char *)(pbTarget-count) ) {
						if ( cbPattern-1==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-count) ) AdvanceHopperGrass++;
						count--;
					}
					if ( count == 0) Railgunhits++; //return((pbTarget-cbPattern));
			        } else { // The goal here: to avoid memory accesses by stressing the registers.
					if ( Quadruplet2nd != (ulHashTarget & 0x0000FF00) ) {
						AdvanceHopperGrass++;
						if ( Quadruplet3rd != (ulHashTarget & 0x00FF0000) ) {
							AdvanceHopperGrass++;
							if ( Quadruplet4th != (ulHashTarget & 0xFF000000) ) AdvanceHopperGrass++;
						}
					}
				}

				AdvanceHopperGrass++;

				pbTarget = pbTarget + AdvanceHopperGrass;
				if (pbTarget > pbTargetMax)
					return(NULL);
			}
		} else { //if (cbTarget<961)
			countSTATIC = cbPattern-2-2;

			for (a=0; a < 256; a++) {bm_bc[a]=cbPattern; bm_bc2nd[a]=cbPattern+1;}
			for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1; 
			for (j=0; j < cbPattern; j++) bm_bc2nd[pbPattern[j]]=cbPattern-j; 

			ulHashPattern = *(unsigned long *)(pbPattern); // First four bytes
			//ulHashTarget = *(unsigned short *)(pbPattern+cbPattern-1-1); // Last two bytes
		
			AdvanceHopperGrass = 0;
			i=0;

			// Elsiane r.2  [
			for (a=0; a < 256*256; a++) {bm_Horspool_Order2[a]= cbPattern-1;} // cbPattern-(Order-1) for Horspool; 'memset' if not optimized

			// alfalfa 7 long 6 BBs (al lf fa al lf fa) 3 distinct BBs (al lf fa) 
			// fast 4 0-1-2 fa as st
			for (j=0; j < cbPattern-1; j++) bm_Horspool_Order2[*(unsigned short *)(pbPattern+j)]=j; // Rightmost appearance/position is needed

			// Elsiane r.2 ]

			while (i <= cbTarget-cbPattern-1) { // -1 because Sunday is used
				Gulliver = bm_Horspool_Order2[*(unsigned short *)&pbTarget[i+cbPattern-1-1]];

				if ( Gulliver == cbPattern-2 ) { // CASE #1: means the pair (char order 2) is found
					if ( *(unsigned long *)&pbTarget[i] == ulHashPattern) {
						count = countSTATIC; // Last two chars already matched, to be fixed with -2
						while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)+4) == *(char *)(&pbTarget[i]+(countSTATIC-count)+4) )
							count--;
						if ( count == 0) Railgunhits++; //return(pbTarget+i);
					}
					//i = i + 1; // r.1, obviuosly this is the worst skip so turning to 'SunHorse': lines below
					if ( bm_bc[(unsigned char)pbTarget[i+cbPattern-1]] < bm_bc2nd[(unsigned char)pbTarget[i+(cbPattern)]] )
					         Gulliver =  bm_bc2nd[(unsigned char)pbTarget[i+(cbPattern)]];
					else
					         Gulliver =  bm_bc[(unsigned char)pbTarget[i+cbPattern-1]];
				} else if ( Gulliver != cbPattern-1 ) // CASE #2: if equal means the pair (char order 2) is not found i.e. Gulliver remains intact, skip the whole pattern and fall back (Order-1) chars i.e. one char for Order 2
					Gulliver = cbPattern - Gulliver - 2; // CASE #3: the pair is found and not as suffix i.e. rightmost position

				i = i + Gulliver;

// 32323218 Order 1 Horspool Skip-table A
// 01234568 Order 1 Horspool Skip-table B
// fa af fa af fa as st Order 2 Horspool Skip-table B
//  0  1  2  3  4  5  6
// HIKARIfast
// fafafast
//   fafafast +2 Order 1 'a' vs 't'
//   fafafast +2 = (cbPattern-SkipB-Order = 8-5-1 = 2) Order 1 'a' vs 't'
//   fafafast +2 = (cbPattern-SkipB-Order = 8-4-2 = 2) Order 2 'fa' vs 'st' i.e. CASE #3

// 76543218 Order 1 Horspool
// lo on ng gp pa ac ce Order 2 Horspool
//  0  1  2  3  4  5  6
// HIKARIfast
// longpace
//   longpace +2 Order 1 'a' vs 'e'
//        longpace +7 = (cbPattern-(Order-1) = 8-(2-1) = 7) Order 2 'fa' vs 'ce' i.e. CASE #2

				AdvanceHopperGrass++;
			}

			if (i == cbTarget-cbPattern) {
				if ( *(unsigned long *)&pbTarget[i] == ulHashPattern) {
					count = countSTATIC;
					while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)+4) == *(char *)(&pbTarget[i]+(countSTATIC-count)+4) )
						count--;
					if ( count == 0) Railgunhits++; //return(pbTarget+i);
				}
				AdvanceHopperGrass++;
			}

			GlobalSP += (int)((double)cbTarget/AdvanceHopperGrass*100);
			GlobalI += AdvanceHopperGrass;
			printf("Skip-Performance(bigger-the-better): %d%%, %d skips/iterations\n",(int)((double)cbTarget/AdvanceHopperGrass*100), AdvanceHopperGrass);
		
			return(NULL);
		} //if (cbTarget<961)
	} //if ( cbPattern<4)
}
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Sunday-Horspool algorithm ]



// NON-BUGGY [
/*
//Bad character preprocessing:
void bmInitocc()
{
    char a;
    int j;

    for (a=0; a<alphabetsize; a++)
        occ[a]=-1;

    for (j=0; j<m; j++)
    {
        a=p[j];
        occ[a]=j;
    }
}

//Good suffix preprocessing case 1:
void bmPreprocess1()
{
    int i=m, j=m+1;
    f[i]=j;
    while (i>0)
    {
        while (j<=m && p[i-1]!=p[j-1])
        {
            if (s[j]==0) s[j]=j-i;
            j=f[j];
        }
        i--; j--;
        f[i]=j;
    }
}

//Good suffix preprocessing case 2:
void bmPreprocess2()
{
    int i, j;
    j=f[0];
    for (i=0; i<=m; i++)
    {
        if (s[i]==0) s[i]=j;
        if (i==j) j=f[j];
    }
}

//Boyer-Moore preprocessing:
void bmPreprocess()
{
    int[] f=new int[m+1];
    bmInitocc();
    bmPreprocess1();
    bmPreprocess2();
}

//Boyer-Moore searching algorithm:
void bmSearch()
{
    int i=0, j;
    while (i<=n-m)
    {
        j=m-1;
        while (j>=0 && p[j]==t[i+j]) j--;
        if (j<0)
        {
            report(i);
            i+=s[0];
        }
        else 
            i+=Math.max(s[j+1], j-occ[t[i+j]]);
    }
}
*/


static char * Boyer_Moore_Flensburg(char *t, char *p, long n, int m)
{
    unsigned long  AdvanceHopperGrass=0;

    int i, j;
    signed int f[XSIZE+1]; //int f[m+1]; non-dynamical, here up to XSIZE long pattern.
    signed int s[XSIZE+1];
    signed int occ[ASIZE];

//0
    for (i=0; i<ASIZE; i++)
        occ[i]=-1;

    for (j=0; j<m; j++)
    {
        occ[(unsigned char)p[j]]=j;
    }

//1
    for (i=0; i<=m; i++)
    {
        f[i]=0;
        s[i]=0;
    }

    i=m, j=m+1;
    f[i]=j;
    while (i>0)
    {
        while (j<=m && p[i-1]!=p[j-1])
        {
            if (s[j]==0) s[j]=j-i;
            j=f[j];
        }
        i--; j--;
        f[i]=j;
    }

//2
    j=f[0];
    for (i=0; i<=m; i++)
    {
        if (s[i]==0) s[i]=j;
        if (i==j) j=f[j];
    }

//3
    i=0;
    while (i<=n-m)
    {
        j=m-1;
        while (j>=0 && p[j]==t[i+j]) j--;
        if (j<0)
        {
            Railgunhits++; //return(t+i); //report(i);
            i+=s[0];
        }
        else 
            i+=MAX(s[j+1], j-occ[t[i+j]]);
AdvanceHopperGrass++;
    }

GlobalSP += (int)((double)n/AdvanceHopperGrass*100);
GlobalI += AdvanceHopperGrass;
printf("Skip-Performance(bigger-the-better): %d%%, %d skips/iterations\n",(int)((double)n/AdvanceHopperGrass*100), AdvanceHopperGrass);

return(NULL);
}
// NON-BUGGY ]








// Purpose: This is optimized strstr-like (memmem in fact) for short haystacks (up to several hundred bytes) function.
// Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char.
#define Kaze64bit // When commented the 32bit fragment takes over.
char * Railgun_Doublet_Triplet_Quadruplet (char * pbTarget, char * pbPattern, uint32_t cbTarget, uint32_t cbPattern)
{
	char * pbTargetMax = pbTarget + cbTarget;
	register uint32_t ulHashPattern;
	uint32_t ulHashTarget, count, countSTATIC;
	unsigned char SINGLET;
	uint32_t Quadruplet2nd;
	uint32_t Quadruplet3rd;
	uint32_t Quadruplet4th;
	uint32_t AdvanceHopperGrass;

	if (cbPattern > cbTarget) return(NULL);

#ifdef Kaze64bit
	countSTATIC = cbPattern-2;
	pbTarget = pbTarget+cbPattern;
	ulHashPattern = (*(uint16_t *)(pbPattern));
	for ( ;; ) {
		if ( ulHashPattern == (*(uint16_t *)(pbTarget-cbPattern)) ) {
			count = countSTATIC;
			while ( count && *(char *)(pbPattern+2+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+2+(countSTATIC-count)) ) {
				count--;
			}
			if ( count == 0 ) return((pbTarget-cbPattern));
		}
		pbTarget++;
		if (pbTarget > pbTargetMax) return(NULL);
	}
#else				
	if ( cbPattern<4 ) {
        	pbTarget = pbTarget+cbPattern;
		ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));
		if ( cbPattern==3 ) {
			for ( ;; ) {
				if ( ulHashPattern == ( (*(char *)(pbTarget-3))<<8 ) + *(pbTarget-1) ) {
					if ( *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) return((pbTarget-3));
				}
				if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) { 
					pbTarget++;
					if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) pbTarget++;
				}
				pbTarget++;
				if (pbTarget > pbTargetMax) return(NULL);
			}
		} else {
		}
		for ( ;; ) {
			if ( ulHashPattern == ( (*(char *)(pbTarget-2))<<8 ) + *(pbTarget-1) ) return((pbTarget-2));
			if ( (char)(ulHashPattern>>8) != *(pbTarget-1) ) pbTarget++;
			pbTarget++;
			if (pbTarget > pbTargetMax) return(NULL);
		}
	} else {
		pbTarget = pbTarget+cbPattern;
		ulHashPattern = *(uint32_t *)(pbPattern);
		SINGLET = ulHashPattern & 0xFF;
		Quadruplet2nd = SINGLET<<8;
		Quadruplet3rd = SINGLET<<16;
		Quadruplet4th = SINGLET<<24;
		for ( ;; ) {
			AdvanceHopperGrass = 0;
			ulHashTarget = *(uint32_t *)(pbTarget-cbPattern);
			if ( ulHashPattern == ulHashTarget ) { // Three unnecessary comparisons here, but 'AdvanceHopperGrass' must be calculated - it has a higher priority.
				count = cbPattern-1;
				while ( count && *(char *)(pbPattern+(cbPattern-count)) == *(char *)(pbTarget-count) ) {
					if ( cbPattern-1==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-count) ) AdvanceHopperGrass++;
					count--;
				}
				if ( count == 0) return((pbTarget-cbPattern));
			} else { // The goal here: to avoid memory accesses by stressing the registers.
				if ( Quadruplet2nd != (ulHashTarget & 0x0000FF00) ) {
					AdvanceHopperGrass++;
					if ( Quadruplet3rd != (ulHashTarget & 0x00FF0000) ) {
						AdvanceHopperGrass++;
						if ( Quadruplet4th != (ulHashTarget & 0xFF000000) ) AdvanceHopperGrass++;
					}
				}
			}
			AdvanceHopperGrass++;
			pbTarget = pbTarget + AdvanceHopperGrass;
			if (pbTarget > pbTargetMax) return(NULL);
		}
	}
#endif
}

// Scheherezade -> Hasherezade
// Purpose: This is optimized strstr-like (memmem in fact) for short needles (up to 255 bytes) function.
// Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char.
// Version: This is Hasherezade r.2
#define ROL(x, n) (((x) << (n)) | ((x) >> (32-(n))))
//#define HasherezadeOrder8 // If commented then Order4
//#define HasherezadeBYTEWISE // If commented then BITWISE
#define HashTableSize 13
#ifdef HasherezadeBYTEWISE
	#define HashTableShrink 0 // Don't change!
#else				
	#define HashTableShrink 3 // Don't change!
#endif
#define HaystackThreshold 961 // Quadruplet works up to this value, if bigger then BMHS takes over
#define NeedleThreshold 9 // BMHS works up to this value, if bigger then BMHS reinforced by HASH order 4/8 takes over
#define JumpThreshold 1 // if jump/stride obtained with (BMHS + JumpThreshold) is smaller than HASH jump/stride than go hashing
char * Railgun_Hasherezade (char * pbTarget, char * pbPattern, uint32_t cbTarget, uint32_t cbPattern)
{
	char * pbTargetMax = pbTarget + cbTarget;
	register uint32_t ulHashPattern;
	register uint32_t ulHashTarget;
	signed long count;
	signed long countSTATIC;

	unsigned char SINGLET;
	uint32_t Quadruplet2nd;
	uint32_t Quadruplet3rd;
	uint32_t Quadruplet4th;

	uint32_t AdvanceHopperGrass;

	long i; //BMH needed
	int a, j;
	unsigned int bm_bc[256]; // BMH needed
	unsigned int bm_bc2nd[256]; // BMS needed
	unsigned char bm_Horspool_Order2[256*256]; // BMHSS(Elsiane) needed, 'char' limits patterns to 255, if 'long' then table becomes 256KB, grrr.
	unsigned char bm_Hasherezade_HASH[1<<(HashTableSize-HashTableShrink)];
	uint32_t hash32;
	uint32_t hash32B;
	uint32_t Gulliver; // or unsigned char or unsigned short

	if (cbPattern > cbTarget) return(NULL);

	if ( cbPattern<4 ) { 

        	pbTarget = pbTarget+cbPattern;
		ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));
		if ( cbPattern==3 ) {
			for ( ;; ) {
				if ( ulHashPattern == ( (*(char *)(pbTarget-3))<<8 ) + *(pbTarget-1) ) {
					if ( *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) return((pbTarget-3));
				}
				if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) { 
					pbTarget++;
					if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) pbTarget++;
				}
				pbTarget++;
				if (pbTarget > pbTargetMax) return(NULL);
			}
		} else {
		}
		for ( ;; ) {
			if ( ulHashPattern == ( (*(char *)(pbTarget-2))<<8 ) + *(pbTarget-1) ) return((pbTarget-2));
			if ( (char)(ulHashPattern>>8) != *(pbTarget-1) ) pbTarget++;
			pbTarget++;
			if (pbTarget > pbTargetMax) return(NULL);
		}

	} else {
		if (cbTarget<HaystackThreshold) { // This value is arbitrary (don't know how exactly), it ensures (at least must) better performance than 'Boyer_Moore_Horspool'.

		pbTarget = pbTarget+cbPattern;
		ulHashPattern = *(uint32_t *)(pbPattern);
		SINGLET = ulHashPattern & 0xFF;
		Quadruplet2nd = SINGLET<<8;
		Quadruplet3rd = SINGLET<<16;
		Quadruplet4th = SINGLET<<24;
		for ( ;; ) {
			AdvanceHopperGrass = 0;
			ulHashTarget = *(uint32_t *)(pbTarget-cbPattern);
			if ( ulHashPattern == ulHashTarget ) { // Three unnecessary comparisons here, but 'AdvanceHopperGrass' must be calculated - it has a higher priority.
				count = cbPattern-1;
				while ( count && *(char *)(pbPattern+(cbPattern-count)) == *(char *)(pbTarget-count) ) {
					if ( cbPattern-1==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-count) ) AdvanceHopperGrass++;
					count--;
				}
				if ( count == 0) return((pbTarget-cbPattern));
			} else { // The goal here: to avoid memory accesses by stressing the registers.
				if ( Quadruplet2nd != (ulHashTarget & 0x0000FF00) ) {
					AdvanceHopperGrass++;
					if ( Quadruplet3rd != (ulHashTarget & 0x00FF0000) ) {
						AdvanceHopperGrass++;
						if ( Quadruplet4th != (ulHashTarget & 0xFF000000) ) AdvanceHopperGrass++;
					}
				}
			}
			AdvanceHopperGrass++;
			pbTarget = pbTarget + AdvanceHopperGrass;
			if (pbTarget > pbTargetMax) return(NULL);
		}

		} else { //if (cbTarget<HaystackThreshold)

			countSTATIC = cbPattern-2-2;

			for (a=0; a < 256; a++) {bm_bc[a]=cbPattern; bm_bc2nd[a]=cbPattern+1;}
			for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1; 
			for (j=0; j < cbPattern; j++) bm_bc2nd[pbPattern[j]]=cbPattern-j; 

			ulHashPattern = *(uint32_t *)(pbPattern); // First four bytes
			//ulHashTarget = *(unsigned short *)(pbPattern+cbPattern-1-1); // Last two bytes
		
			i=0;
			for (a=0; a < 256*256; a++) {bm_Horspool_Order2[a]= cbPattern-1;} // cbPattern-(Order-1) for Horspool; 'memset' if not optimized
			for (j=0; j < cbPattern-1; j++) bm_Horspool_Order2[*(unsigned short *)(pbPattern+j)]=j; // Rightmost appearance/position is needed

	if ( cbPattern>NeedleThreshold ) { 

			for (a=0; a < 1<<(HashTableSize-HashTableShrink); a++) {bm_Hasherezade_HASH[a]= 0;} // to-do: 'memset' if not optimized
			// cbPattern - Order + 1 i.e. number of BBs for 11 'fastest fox' 11-8+1=4: 'fastest ', 'astest f', 'stest fo', 'test fox'
#ifdef HasherezadeOrder8
			for (j=0; j < cbPattern-8+1; j++) {
				hash32 = (2166136261 ^ *(uint32_t*)(pbPattern+j+0)) * 709607;
				hash32B = (2166136261 ^ *(uint32_t*)(pbPattern+j+4)) * 709607;
				hash32 = (hash32 ^ ROL(hash32B,5) ) * 709607;
#ifdef HasherezadeBYTEWISE
				bm_Hasherezade_HASH[( hash32 ^ (hash32 >> 16) ) & ( (1<<(HashTableSize))-1 )]=1;
#else				
				hash32 = ( hash32 ^ (hash32 >> 16) ) & ( (1<<(HashTableSize))-1 );
				bm_Hasherezade_HASH[hash32>>3]= bm_Hasherezade_HASH[hash32>>3] | (1<<(hash32&0x7));
#endif
#else
			for (j=0; j < cbPattern-4+1; j++) {
				hash32 = (2166136261 ^ *(uint16_t*)(pbPattern+j+0)) * 709607;
				hash32B = (2166136261 ^ *(uint16_t*)(pbPattern+j+2)) * 709607;
				hash32 = (hash32 ^ ROL(hash32B,5) ) * 709607;
#ifdef HasherezadeBYTEWISE
				bm_Hasherezade_HASH[( hash32 ^ (hash32 >> 16) ) & ( (1<<(HashTableSize))-1 )]=1;
#else				
				hash32 = ( hash32 ^ (hash32 >> 16) ) & ( (1<<(HashTableSize))-1 );
				bm_Hasherezade_HASH[hash32>>3]= bm_Hasherezade_HASH[hash32>>3] | (1<<(hash32&0x7));
#endif
#endif
			}

			while (i <= cbTarget-cbPattern-1) { // -1 because Sunday is used
				Gulliver = bm_Horspool_Order2[*(unsigned short *)&pbTarget[i+cbPattern-1-1]];

				if ( Gulliver == cbPattern-2 ) { // CASE #1: means the pair (char order 2) is found
					if ( *(uint32_t *)&pbTarget[i] == ulHashPattern) {
						count = countSTATIC; // Last two chars already matched, to be fixed with -2
						while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)+4) == *(char *)(&pbTarget[i]+(countSTATIC-count)+4) )
							count--;
						if ( count == 0) return(pbTarget+i);
					}
					//if ( bm_bc[(unsigned char)pbTarget[i+cbPattern-1]] < bm_bc2nd[(unsigned char)pbTarget[i+(cbPattern)]] )
					         Gulliver =  bm_bc2nd[(unsigned char)pbTarget[i+(cbPattern)]];
					//else
					//         Gulliver =  bm_bc[(unsigned char)pbTarget[i+cbPattern-1]];
				} else if ( Gulliver != cbPattern-1 ) // CASE #2: if equal means the pair (char order 2) is not found i.e. Gulliver remains intact, skip the whole pattern and fall back (Order-1) chars i.e. one char for Order 2
					Gulliver = cbPattern - Gulliver - 2; // CASE #3: the pair is found and not as suffix i.e. rightmost position
			
				// The goal: to jump when the rightmost 8bytes (Order 8 Horspool) of window do not look like any of Needle prefixes i.e. are not to be found. This maximum jump equals cbPattern-(Order-1) or 11-(8-1)=4 for 'fastest fox' - a small one but for Needle 31 bytes the jump equals 31-(8-1)=24
#ifdef HasherezadeOrder8
				if (Gulliver + (JumpThreshold) < cbPattern-(8-1)) { 
					hash32 = (2166136261 ^ *(uint32_t*)(pbTarget+i+cbPattern-8+0)) * 709607;
					hash32B = (2166136261 ^ *(uint32_t*)(pbTarget+i+cbPattern-8+4)) * 709607;
					hash32 = (hash32 ^ ROL(hash32B,5) ) * 709607;
#ifdef HasherezadeBYTEWISE
					if ( bm_Hasherezade_HASH[( hash32 ^ (hash32 >> 16) ) & ( (1<<(HashTableSize))-1 )]==0 )
						Gulliver = cbPattern-(8-1);
#else
					hash32 = ( hash32 ^ (hash32 >> 16) ) & ( (1<<(HashTableSize))-1 );
					if ( (bm_Hasherezade_HASH[hash32>>3] & (1<<(hash32&0x7))) ==0 )
						Gulliver = cbPattern-(8-1);
#endif
				}
#else
				if (Gulliver + (JumpThreshold) < cbPattern-(4-1)) { 
					hash32 = (2166136261 ^ *(uint16_t*)(pbTarget+i+cbPattern-4+0)) * 709607;
					hash32B = (2166136261 ^ *(uint16_t*)(pbTarget+i+cbPattern-4+2)) * 709607;
					hash32 = (hash32 ^ ROL(hash32B,5) ) * 709607;
#ifdef HasherezadeBYTEWISE
					if ( bm_Hasherezade_HASH[( hash32 ^ (hash32 >> 16) ) & ( (1<<(HashTableSize))-1 )]==0 )
						Gulliver = cbPattern-(4-1);
#else
					hash32 = ( hash32 ^ (hash32 >> 16) ) & ( (1<<(HashTableSize))-1 );
					if ( (bm_Hasherezade_HASH[hash32>>3] & (1<<(hash32&0x7))) ==0 )
						Gulliver = cbPattern-(4-1);
#endif
				}
#endif
					i = i + Gulliver;
				GlobalI++;
			}

	} else { //if ( cbPattern>NeedleThreshold )
// Slower than pure Sunday, that's why commented (for small needles there is no much gain in choosing a bigger stride):
/*
			while (i <= cbTarget-cbPattern-1) { // -1 because Sunday is used
				Gulliver = bm_Horspool_Order2[*(unsigned short *)&pbTarget[i+cbPattern-1-1]];

				if ( Gulliver == cbPattern-2 ) { // CASE #1: means the pair (char order 2) is found
					if ( *(uint32_t *)&pbTarget[i] == ulHashPattern) {
						count = countSTATIC; // Last two chars already matched, to be fixed with -2
						while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)+4) == *(char *)(&pbTarget[i]+(countSTATIC-count)+4) )
							count--;
						if ( count == 0) return(pbTarget+i);
					}
					//i = i + 1; // r.1, obviuosly this is the worst skip so turning to 'SunHorse': lines below
					if ( bm_bc[(unsigned char)pbTarget[i+cbPattern-1]] < bm_bc2nd[(unsigned char)pbTarget[i+(cbPattern)]] )
					         Gulliver =  bm_bc2nd[(unsigned char)pbTarget[i+(cbPattern)]];
					else
					         Gulliver =  bm_bc[(unsigned char)pbTarget[i+cbPattern-1]];
				} else if ( Gulliver != cbPattern-1 ) // CASE #2: if equal means the pair (char order 2) is not found i.e. Gulliver remains intact, skip the whole pattern and fall back (Order-1) chars i.e. one char for Order 2
					Gulliver = cbPattern - Gulliver - 2; // CASE #3: the pair is found and not as suffix i.e. rightmost position

				i = i + Gulliver;

// 32323218 Order 1 Horspool Skip-table A
// 01234568 Order 1 Horspool Skip-table B
// fa af fa af fa as st Order 2 Horspool Skip-table B
//  0  1  2  3  4  5  6
// HIKARIfast
// fafafast
//   fafafast +2 Order 1 'a' vs 't'
//   fafafast +2 = (cbPattern-SkipB-Order = 8-5-1 = 2) Order 1 'a' vs 't'
//   fafafast +2 = (cbPattern-SkipB-Order = 8-4-2 = 2) Order 2 'fa' vs 'st' i.e. CASE #3

// 76543218 Order 1 Horspool
// lo on ng gp pa ac ce Order 2 Horspool
//  0  1  2  3  4  5  6
// HIKARIfast
// longpace
//   longpace +2 Order 1 'a' vs 'e'
//        longpace +7 = (cbPattern-(Order-1) = 8-(2-1) = 7) Order 2 'fa' vs 'ce' i.e. CASE #2

				GlobalI++;
			}
*/
			while (i <= cbTarget-cbPattern-1) {
				if ( *(unsigned long *)&pbTarget[i] == ulHashPattern) {
					count = countSTATIC;
					while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)+4) == *(char *)(&pbTarget[i]+(countSTATIC-count)+4) ) { // if pattern length is 4 or 5 we have count=-1 and count=0 respectively i.e. no need of comparing in-between chars.
						count--;
					}
					if ( count == 0) return(pbTarget+i);
				}
				i= i + bm_bc2nd[(unsigned char)pbTarget[i+(cbPattern)]];
				GlobalI++;
			}
	} //if ( cbPattern>NeedleThreshold )

			if (i == cbTarget-cbPattern) {
				if ( *(uint32_t *)&pbTarget[i] == ulHashPattern) {
					count = countSTATIC;
					while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)+4) == *(char *)(&pbTarget[i]+(countSTATIC-count)+4) )
						count--;
					if ( count == 0) return(pbTarget+i);
				}
				GlobalI++;
			}
			return(NULL);

		} //if (cbTarget<HaystackThreshold)
	} //if ( cbPattern<4 )
}






/*
First off: I am heavily disappointed from Speed-Performance of 'Hasherezade' and comparing Skip-Performance of 'Gulliver' and 'Hasherezade' doesn't make me happy, either.

Pattern: fastest fox
Doing Search for Pattern(11bytes) into String(206908949bytes) as-one-line ...

Railgun_Quadruplet_7sun performance:         1,683KB/clock / 00,775%, 26,672,940 skips/iterations
Railgun_Quadruplet_7 performance:            1,642KB/clock / 00,669%, 30,925,578 skips/iterations
Railgun_Quadruplet_7sunhorse performance:    0,944KB/clock / 00,912%, 22,663,583 skips/iterations
Railgun_Quadruplet_7deuce performance:       0,801KB/clock / 00,797%, 25,945,709 skips/iterations
Railgun_Quadruplet_7Elsiane performance:     0,704KB/clock / 00,931%, 22,213,101 skips/iterations
Railgun_Quadruplet_7Gulliver performance:    2,104KB/clock / 00,977%, 21,166,516 skips/iterations
Railgun_Quadruplet_7Hasherezade performance: 1,496KB/clock / 00,980%, 21,112,657 skips/iterations  18bit HashTable
Railgun_Quadruplet_7Hasherezade performance: 1,642KB/clock / 00,980%, 21,112,646 skips/iterations  14bit HashTable
Railgun_Quadruplet_7Hasherezade performance: 1,642KB/clock / 00,980%, 21,112,735 skips/iterations  10bit HashTable
Boyer_Moore_Flensburg performance:           1,057KB/clock / 00,669%, 30,925,578 skips/iterations

Pattern: and every day a continuous cleaning goes on
Doing Search for Pattern(43bytes) into String(206908949bytes) as-one-line ...

Railgun_Quadruplet_7sun performance:         2,557KB/clock / 01,495%, 13,832,201 skips/iterations
Railgun_Quadruplet_7 performance:            2,590KB/clock / 01,404%, 14,731,326 skips/iterations
Railgun_Quadruplet_7sunhorse performance:    1,888KB/clock / 02,222%, 09,308,136 skips/iterations
Railgun_Quadruplet_7deuce performance:       1,741KB/clock / 01,971%, 10,494,460 skips/iterations
Railgun_Quadruplet_7Elsiane performance:     1,642KB/clock / 02,229%, 09,279,871 skips/iterations
Railgun_Quadruplet_7Gulliver performance:    3,157KB/clock / 03,795%, 05,450,890 skips/iterations
Railgun_Quadruplet_7Hasherezade performance: 2,322KB/clock / 04,100%, 05,046,049 skips/iterations  18bit HashTable
Railgun_Quadruplet_7Hasherezade performance: 2,971KB/clock / 04,100%, 05,046,445 skips/iterations  14bit HashTable
Railgun_Quadruplet_7Hasherezade performance: 3,015KB/clock / 04,090%, 05,058,667 skips/iterations  10bit HashTable
Boyer_Moore_Flensburg performance:           1,683KB/clock / 01,447%, 14,294,963 skips/iterations

Pattern: And let this be your very fundamental insight... about everything. Just for one year, don't choose.
Doing Search for Pattern(99bytes) into String(206908949bytes) as-one-line ...

Railgun_Quadruplet_7sun performance:         2,845KB/clock / 02,248%, 09,200,917 skips/iterations
Railgun_Quadruplet_7 performance:            2,928KB/clock / 02,105%, 09,827,389 skips/iterations
Railgun_Quadruplet_7sunhorse performance:    2,270KB/clock / 03,283%, 06,302,096 skips/iterations
Railgun_Quadruplet_7deuce performance:       2,149KB/clock / 03,196%, 06,472,407 skips/iterations
Railgun_Quadruplet_7Elsiane performance:     2,172KB/clock / 03,282%, 06,303,154 skips/iterations
Railgun_Quadruplet_7Gulliver performance:    2,928KB/clock / 07,820%, 02,645,814 skips/iterations
Railgun_Quadruplet_7Hasherezade performance: 2,494KB/clock / 09,575%, 02,160,890 skips/iterations  18bit HashTable
Railgun_Quadruplet_7Hasherezade performance: 3,015KB/clock / 09,568%, 02,162,493 skips/iterations  14bit HashTable
Railgun_Quadruplet_7Hasherezade performance: 3,015KB/clock / 09,438%, 02,192,204 skips/iterations  10bit HashTable
Boyer_Moore_Flensburg performance:           2,349KB/clock / 02,135%, 09,689,008 skips/iterations

Pattern: Then, singing among the savage branches, it impales itself upon the longest, sharpest spine. And, dying, it rises above its own agony to outcarol the lark and the nightingale.
Doing Search for Pattern(175bytes) into String(206908949bytes) as-one-line ...

Railgun_Quadruplet_7sun performance:         2,658KB/clock / 03,142%, 06,583,682 skips/iterations
Railgun_Quadruplet_7 performance:            2,658KB/clock / 03,095%, 06,685,179 skips/iterations
Railgun_Quadruplet_7sunhorse performance:    2,377KB/clock / 04,776%, 04,331,865 skips/iterations
Railgun_Quadruplet_7deuce performance:       2,349KB/clock / 04,728%, 04,376,097 skips/iterations
Railgun_Quadruplet_7Elsiane performance:     2,525KB/clock / 04,778%, 04,330,200 skips/iterations
Railgun_Quadruplet_7Gulliver performance:    2,245KB/clock / 12,024%, 01,720,711 skips/iterations
Railgun_Quadruplet_7Hasherezade performance: 2,434KB/clock / 17,127%, 01,208,072 skips/iterations  18bit HashTable
Railgun_Quadruplet_7Hasherezade performance: 2,590KB/clock / 17,084%, 01,211,064 skips/iterations  14bit HashTable
Railgun_Quadruplet_7Hasherezade performance: 2,590KB/clock / 16,379%, 01,263,238 skips/iterations  10bit HashTable
Boyer_Moore_Flensburg performance:           2,525KB/clock / 03,198%, 06,469,280 skips/iterations  Five times more main-cycles and faster than 'Hasherezade', pshaw!
*/

// Revision: 1, 2012-Feb-01, the main disadvantage: the preprocessing overhead PLUS a hasher.
// Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char.
char * Railgun_Quadruplet_7Hasherezade_count_hits (char * pbTarget, char * pbPattern, unsigned long cbTarget, unsigned long cbPattern)
{
	char * pbTargetMax = pbTarget + cbTarget;
	register unsigned long ulHashPattern;
	register unsigned long ulHashTarget;
	signed long count;
	signed long countSTATIC;

	unsigned char SINGLET;
	unsigned long Quadruplet2nd;
	unsigned long Quadruplet3rd;
	unsigned long Quadruplet4th;

	unsigned long  AdvanceHopperGrass;

	long i; //BMH needed
	int a, j;
	unsigned int bm_bc[256]; //BMH needed
	unsigned int bm_bc2nd[256]; //BMS needed
	unsigned char bm_Horspool_Order2[256*256]; //BMHSS(Elsiane) needed, 'char' limits patterns to 255, if 'long' then table becomes 256KB, grrr.
	unsigned char bm_Hasherezade_HASH[1<<(HashTableSize)]; // Jesteressing 8bytes (Horspool Order 8) for fast lookup, should be bitwise (i.e. 8times smaller) since it says yes/no for presence.
	uint32_t hash32;
	unsigned long Gulliver; // or unsigned char or unsigned short

	if (cbPattern > cbTarget)
		return(NULL);

	if ( cbPattern<4) { 
		pbTarget = pbTarget+cbPattern;
		ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));

		if ( cbPattern==3) {
			for ( ;; ) {
				if ( ulHashPattern == ( (*(char *)(pbTarget-3))<<8 ) + *(pbTarget-1) ) {
					if ( *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) Railgunhits++; //return((pbTarget-3));
				}
				if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) pbTarget++;
				pbTarget++;
				if (pbTarget > pbTargetMax)
					return(NULL);
			}
		} else {
		}
		for ( ;; ) {
			if ( ulHashPattern == ( (*(char *)(pbTarget-2))<<8 ) + *(pbTarget-1) )
				Railgunhits++; //return((pbTarget-2));
			if ( (char)(ulHashPattern>>8) != *(pbTarget-1) ) pbTarget++;
			pbTarget++;
			if (pbTarget > pbTargetMax)
				return(NULL);
		}
	} else { //if ( cbPattern<4)
		if (cbTarget<961) { // This value is arbitrary(don't know how exactly), it ensures(at least must) better performance than 'Boyer_Moore_Horspool'.
/*
			// A better strstr, with no asm code
			// Written by Mischa Sandberg
			// http://mischasan.wordpress.com
			// static char const *
			// scanstrm(char const *tgt, char const *pat, int len)
			// {
			//     uint32_t head = MSBF32(pat), wind = 0, next;
			// 
			//     pat += 4, len -= 4;
			//     while ((next = *(uint8_t const*)tgt++)) {
			//         wind = ( wind << 8 ) + next;
			//         if (wind == head && !memcmp(tgt, pat, len))
			//             return tgt - 4;
			//     }
			//     return  NULL;
			//}
			ulHashPattern = 0;
			ulHashPattern = ( ulHashPattern << 8 ) + *(uint8_t const*)pbPattern++;
			ulHashPattern = ( ulHashPattern << 8 ) + *(uint8_t const*)pbPattern++;
			ulHashPattern = ( ulHashPattern << 8 ) + *(uint8_t const*)pbPattern++;
			ulHashPattern = ( ulHashPattern << 8 ) + *(uint8_t const*)pbPattern++;
			AdvanceHopperGrass = 0;
			cbPattern -= 4;
			while ((ulHashTarget = *(uint8_t const*)pbTarget++)) {
				AdvanceHopperGrass = ( AdvanceHopperGrass << 8 ) + ulHashTarget;
				if (AdvanceHopperGrass == ulHashPattern && !memcmp(pbTarget, pbPattern, cbPattern))
				Railgunhits++; //return pbTarget - 4;
			}
			return  NULL;
*/
			pbTarget = pbTarget+cbPattern;
			ulHashPattern = *(unsigned long *)(pbPattern);

			SINGLET = ulHashPattern & 0xFF;
			Quadruplet2nd = SINGLET<<8;
			Quadruplet3rd = SINGLET<<16;
			Quadruplet4th = SINGLET<<24;

			for ( ;; ) {
				AdvanceHopperGrass = 0;
				ulHashTarget = *(unsigned long *)(pbTarget-cbPattern);

			        if ( ulHashPattern == ulHashTarget ) { // Three unnecessary comparisons here, but 'AdvanceHopperGrass' must be calculated - it has a higher priority.
					count = cbPattern-1;
					while ( count && *(char *)(pbPattern+(cbPattern-count)) == *(char *)(pbTarget-count) ) {
						if ( cbPattern-1==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-count) ) AdvanceHopperGrass++;
						count--;
					}
					if ( count == 0) Railgunhits++; //return((pbTarget-cbPattern));
			        } else { // The goal here: to avoid memory accesses by stressing the registers.
					if ( Quadruplet2nd != (ulHashTarget & 0x0000FF00) ) {
						AdvanceHopperGrass++;
						if ( Quadruplet3rd != (ulHashTarget & 0x00FF0000) ) {
							AdvanceHopperGrass++;
							if ( Quadruplet4th != (ulHashTarget & 0xFF000000) ) AdvanceHopperGrass++;
						}
					}
				}

				AdvanceHopperGrass++;

				pbTarget = pbTarget + AdvanceHopperGrass;
				if (pbTarget > pbTargetMax)
					return(NULL);
			}
		} else { //if (cbTarget<961)
			countSTATIC = cbPattern-2-2;

			for (a=0; a < 256; a++) {bm_bc[a]=cbPattern; bm_bc2nd[a]=cbPattern+1;}
			for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1; 
			for (j=0; j < cbPattern; j++) bm_bc2nd[pbPattern[j]]=cbPattern-j; 

			ulHashPattern = *(unsigned long *)(pbPattern); // First four bytes
			//ulHashTarget = *(unsigned short *)(pbPattern+cbPattern-1-1); // Last two bytes
		
			AdvanceHopperGrass = 0;
			i=0;

			// Elsiane r.2  [
			for (a=0; a < 256*256; a++) {bm_Horspool_Order2[a]= cbPattern-1;} // cbPattern-(Order-1) for Horspool; 'memset' if not optimized

			// alfalfa 7 long 6 BBs (al lf fa al lf fa) 3 distinct BBs (al lf fa) 
			// fast 4 0-1-2 fa as st
			for (j=0; j < cbPattern-1; j++) bm_Horspool_Order2[*(unsigned short *)(pbPattern+j)]=j; // Rightmost appearance/position is needed

			// Elsiane r.2  ]

	if ( cbPattern>10) { 
			// Hasherezade r.1 [
			// OSHO.TXT has 00,046,486 03bytes distinct BBs
			// OSHO.TXT has 00,248,019 04bytes distinct BBs
			// OSHO.TXT has 00,855,682 05bytes distinct BBs
			// OSHO.TXT has 02,236,138 06bytes distinct BBs
			// OSHO.TXT has 04,803,152 07bytes distinct BBs
			// OSHO.TXT has 08,956,496 08bytes distinct BBs to be hashed in 18bit i.e. 256KB i.e. 262,144 slots i.e. 34 vs 1.
			// OSHO.TXT has 15,006,172 09bytes distinct BBs
			// OSHO.TXT has 22,992,127 10bytes distinct BBs
			// Note: BB stands for Building-Block (also suffix)

			for (a=0; a < 1<<(HashTableSize); a++) {bm_Hasherezade_HASH[a]= 0;} // to-do: bit to replace byte; 'memset' if not optimized
			// cbPattern - Order + 1 i.e. number of BBs for 11 'fastest fox' 11-8+1=4: 'fastest ', 'astest f', 'stest fo', 'test fox'
			for (j=0; j < cbPattern-8+1; j++) {
				hash32 = (2166136261 ^ (ROL(*(uint32_t *)(pbPattern+j),5)^*(uint32_t *)(pbPattern+j+4))) * 709607;        
				bm_Hasherezade_HASH[( hash32 ^ (hash32 >> 16) ) & ( (1<<(HashTableSize))-1 )]=1;
/*
for (a=0; a<8; a++)
printf("%c",*(char *)(pbPattern+j+a) );
printf(" %lu\n",( hash32 ^ (hash32 >> 16) ) & ( (1<<(HashTableSize))-1 ));
//Input Pattern(up to 19+2000 chars): and every day a continuous cleaning goes on
//Doing Search for Pattern(43bytes) into String(206908949bytes) as-one-line ...
BBs      Slot(HashCode for 18bit HashTable)
and ever 117013
nd every 108604
d every  155516
 every d 170959
every da 115291
very day 73191
ery day  97042
ry day a 83793
y day a  11244
 day a c 115855
day a co 101797
ay a con 222568
y a cont 29130
 a conti 20978
a contin 258405
 continu 252691
continuo 123607
ontinuou 56546
ntinuous 135857
tinuous  15332
inuous c 250584
nuous cl 48224
uous cle 106616
ous clea 137020
us clean 35751
s cleani 178989
 cleanin 213855
cleaning 63337
leaning  97138
eaning g 62366
aning go 247590
ning goe 36571
ing goes 41142
ng goes  228365
g goes o 229696
 goes on 176852
*/
			}
			// Hasherezade r.1 ]

			while (i <= cbTarget-cbPattern-1) { // -1 because Sunday is used
				Gulliver = bm_Horspool_Order2[*(unsigned short *)&pbTarget[i+cbPattern-1-1]];

				if ( Gulliver == cbPattern-2 ) { // CASE #1: means the pair (char order 2) is found
					if ( *(unsigned long *)&pbTarget[i] == ulHashPattern) {
						count = countSTATIC; // Last two chars already matched, to be fixed with -2
						while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)+4) == *(char *)(&pbTarget[i]+(countSTATIC-count)+4) )
							count--;
						if ( count == 0) Railgunhits++; //return(pbTarget+i);
					}
					//i = i + 1; // r.1, obviuosly this is the worst skip so turning to 'SunHorse': lines below
					if ( bm_bc[(unsigned char)pbTarget[i+cbPattern-1]] < bm_bc2nd[(unsigned char)pbTarget[i+(cbPattern)]] )
					         Gulliver =  bm_bc2nd[(unsigned char)pbTarget[i+(cbPattern)]];
					else
					         Gulliver =  bm_bc[(unsigned char)pbTarget[i+cbPattern-1]];
				} else if ( Gulliver != cbPattern-1 ) // CASE #2: if equal means the pair (char order 2) is not found i.e. Gulliver remains intact, skip the whole pattern and fall back (Order-1) chars i.e. one char for Order 2
					Gulliver = cbPattern - Gulliver - 2; // CASE #3: the pair is found and not as suffix i.e. rightmost position
			
				// The goal: to jump when the rightmost 8bytes (Order 8 Horspool) of window do not look like any of Needle prefixes i.e. are not to be found. This maximum jump equals cbPattern-(Order-1) or 11-(8-1)=4 for 'fastest fox' - a small one but for Needle 31 bytes the jump equals 31-(8-1)=24
				if (Gulliver < cbPattern-(8-1)) { 
					hash32 = (2166136261 ^ (ROL(*(uint32_t *)(pbTarget+i+cbPattern-8),5)^*(uint32_t *)(pbTarget+i+cbPattern-8+4))) * 709607;        
					if ( bm_Hasherezade_HASH[( hash32 ^ (hash32 >> 16) ) & ( (1<<(HashTableSize))-1 )]==0 )
						Gulliver = cbPattern-(8-1);
				}
					i = i + Gulliver;
				AdvanceHopperGrass++;
/*
; 4155 : 				// The goal: to jump when the rightmost 8bytes (Order 8 Horspool) of window do not look like any of Needle prefixes i.e. are not to be found. This maximum jump equals cbPattern-(Order-1) or 11-(8-1)=4 for 'fastest fox' - a small one but for Needle 31 bytes the jump equals 31-(8-1)=24
; 4156 : 				if (Gulliver < cbPattern-(8-1)) { 

  01f16	8d 43 f9	 lea	 eax, DWORD PTR [ebx-7]
  01f19	3b c8		 cmp	 ecx, eax
  01f1b	73 30		 jae	 SHORT $LN18@Railgun_Qu@8

; 4157 : 					hash32 = (2166136261 ^ (ROL(*(uint32_t *)(pbTarget+i+cbPattern-8),5)^*(uint32_t *)(pbTarget+i+cbPattern-8+4))) * 709607;        

  01f1d	8b 44 32 f8	 mov	 eax, DWORD PTR [edx+esi-8]
  01f21	c1 c0 05	 rol	 eax, 5
  01f24	33 44 32 fc	 xor	 eax, DWORD PTR [edx+esi-4]
  01f28	35 c5 9d 1c 81	 xor	 eax, -2128831035	; 811c9dc5H
  01f2d	69 c0 e7 d3 0a
	00		 imul	 eax, 709607		; 000ad3e7H

; 4158 : 					if ( bm_Hasherezade_HASH[( hash32 ^ (hash32 >> 16) ) & ( (1<<(HashTableSize))-1 )]==0 )

  01f33	8b f8		 mov	 edi, eax
  01f35	c1 ef 10	 shr	 edi, 16			; 00000010H
  01f38	33 f8		 xor	 edi, eax
  01f3a	81 e7 ff ff 03
	00		 and	 edi, 262143		; 0003ffffH
  01f40	80 bc 3c 28 08
	01 00 00	 cmp	 BYTE PTR _bm_Hasherezade_HASH$[esp+edi+329776], 0
  01f48	75 03		 jne	 SHORT $LN18@Railgun_Qu@8

; 4159 : 						Gulliver = cbPattern-(8-1);

  01f4a	8d 4b f9	 lea	 ecx, DWORD PTR [ebx-7]
$LN18@Railgun_Qu@8:

; 4160 : 				}
; 4161 : 					i = i + Gulliver;
; 4162 : 				AdvanceHopperGrass++;
*/
			}

	} else {
			while (i <= cbTarget-cbPattern-1) { // -1 because Sunday is used
				Gulliver = bm_Horspool_Order2[*(unsigned short *)&pbTarget[i+cbPattern-1-1]];

				if ( Gulliver == cbPattern-2 ) { // CASE #1: means the pair (char order 2) is found
					if ( *(unsigned long *)&pbTarget[i] == ulHashPattern) {
						count = countSTATIC; // Last two chars already matched, to be fixed with -2
						while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)+4) == *(char *)(&pbTarget[i]+(countSTATIC-count)+4) )
							count--;
						if ( count == 0) Railgunhits++; //return(pbTarget+i);
					}
					//i = i + 1; // r.1, obviuosly this is the worst skip so turning to 'SunHorse': lines below
					if ( bm_bc[(unsigned char)pbTarget[i+cbPattern-1]] < bm_bc2nd[(unsigned char)pbTarget[i+(cbPattern)]] )
					         Gulliver =  bm_bc2nd[(unsigned char)pbTarget[i+(cbPattern)]];
					else
					         Gulliver =  bm_bc[(unsigned char)pbTarget[i+cbPattern-1]];
				} else if ( Gulliver != cbPattern-1 ) // CASE #2: if equal means the pair (char order 2) is not found i.e. Gulliver remains intact, skip the whole pattern and fall back (Order-1) chars i.e. one char for Order 2
					Gulliver = cbPattern - Gulliver - 2; // CASE #3: the pair is found and not as suffix i.e. rightmost position

				i = i + Gulliver;

// 32323218 Order 1 Horspool Skip-table A
// 01234568 Order 1 Horspool Skip-table B
// fa af fa af fa as st Order 2 Horspool Skip-table B
//  0  1  2  3  4  5  6
// HIKARIfast
// fafafast
//   fafafast +2 Order 1 'a' vs 't'
//   fafafast +2 = (cbPattern-SkipB-Order = 8-5-1 = 2) Order 1 'a' vs 't'
//   fafafast +2 = (cbPattern-SkipB-Order = 8-4-2 = 2) Order 2 'fa' vs 'st' i.e. CASE #3

// 76543218 Order 1 Horspool
// lo on ng gp pa ac ce Order 2 Horspool
//  0  1  2  3  4  5  6
// HIKARIfast
// longpace
//   longpace +2 Order 1 'a' vs 'e'
//        longpace +7 = (cbPattern-(Order-1) = 8-(2-1) = 7) Order 2 'fa' vs 'ce' i.e. CASE #2

				AdvanceHopperGrass++;
			}
	} //if ( cbPattern>10) { 

			if (i == cbTarget-cbPattern) {
				if ( *(unsigned long *)&pbTarget[i] == ulHashPattern) {
					count = countSTATIC;
					while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)+4) == *(char *)(&pbTarget[i]+(countSTATIC-count)+4) )
						count--;
					if ( count == 0) Railgunhits++; //return(pbTarget+i);
				}
				AdvanceHopperGrass++;
			}

			GlobalSP += (int)((double)cbTarget/AdvanceHopperGrass*100);
			GlobalI += AdvanceHopperGrass;
			printf("Skip-Performance(bigger-the-better): %d%%, %d skips/iterations\n",(int)((double)cbTarget/AdvanceHopperGrass*100), AdvanceHopperGrass);
		
			return(NULL);
		} //if (cbTarget<961)
	} //if ( cbPattern<4)
}
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Sunday-Horspool algorithm ]











// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Sunday-Horspool algorithm [
// Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char.
char * Railgun_Quadruplet_7Elsiane_count_hits (char * pbTarget,
     char * pbPattern,
     unsigned long cbTarget,
     unsigned long cbPattern)
{
    char * pbTargetMax = pbTarget + cbTarget;
    register unsigned long ulHashPattern;
    register unsigned long ulHashTarget;
    //unsigned long count; //r.6+
    signed long count;
    //unsigned long countSTATIC; //r.6+
    signed long countSTATIC;
//  unsigned long countRemainder;

/*
    const unsigned char SINGLET = *(char *)(pbPattern);
    const unsigned long Quadruplet2nd = SINGLET<<8;
    const unsigned long Quadruplet3rd = SINGLET<<16;
    const unsigned long Quadruplet4th = SINGLET<<24;
*/
    unsigned char SINGLET;
    unsigned long Quadruplet2nd;
    unsigned long Quadruplet3rd;
    unsigned long Quadruplet4th;

    unsigned long  AdvanceHopperGrass;

    long i; //BMH needed
    int a, j;
    unsigned int bm_bc[ASIZE]; //BMH needed
    //signed int bm_bc2nd[ASIZE]; //BMS needed
    unsigned int bm_bc2nd[ASIZE]; //BMS needed
    unsigned char bm_Sunday_Order2[ASIZE*ASIZE]; //BMHSS(Elsiane) needed, of course those 65536 bytes are scary in next revision reduce them to 8192 i.e. bitwise

    unsigned char ch; //BMH needed
    unsigned long chchchch; //BMH needed
    unsigned short chchchchSHORT; //BMH needed
    //unsigned char * chPTR;
//    unsigned char lastch, firstch; //BMH needed

    if (cbPattern > cbTarget)
        return(NULL);

// Doesn't work when cbPattern = 1
// The next IF-fragment works very well with cbPattern>1, OBVIOUSLY IT MUST BE UNROLLED(but crippled with less functionality) SINCE either cbPattern=2 or cbPattern=3!
if ( cbPattern<4) { // This IF makes me unhappy: it slows down from 390KB/clock to 367KB/clock for 'fast' pattern. This fragment(for 2..3 pattern lengths) is needed because I need a function different than strchr but sticking to strstr i.e. lengths above 1 are to be handled.
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));
//        countSTATIC = cbPattern-2;

if ( cbPattern==3) {
    for ( ;; )
    {
        if ( ulHashPattern == ( (*(char *)(pbTarget-3))<<8 ) + *(pbTarget-1) ) {
         if ( *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) Railgunhits++; //return((pbTarget-3));
        }
        if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) pbTarget++;
        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else {
}
    for ( ;; )
    {
        // The line below gives for 'cbPattern'>=1:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/543
        // Karp_Rabin_Kaze_4_OCTETS performance: 372KB/clock
/*
        if ( (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
            return((long)(pbTarget-cbPattern));
*/

        // The fragment below gives for 'cbPattern'>=2:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/546
        // Karp_Rabin_Kaze_4_OCTETS performance: 370KB/clock

/*
//For 2 and 3 [
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
//         count = countSTATIC;
         count = cbPattern-2;
//         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
         while ( count && *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) { // Crippling i.e. only 2 and 3 chars are allowed!
               count--;
         }
         if ( count == 0) return((pbTarget-cbPattern));
        }
        if ( (char)(ulHashPattern>>8) != *(pbTarget-cbPattern+1) ) pbTarget++;
//For 2 and 3 ]
*/


        if ( ulHashPattern == ( (*(char *)(pbTarget-2))<<8 ) + *(pbTarget-1) )
            Railgunhits++; //return((pbTarget-2));
        if ( (char)(ulHashPattern>>8) != *(pbTarget-1) ) pbTarget++;


        // The fragment below gives for 'cbPattern'>=2:
	// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/554
	// Karp_Rabin_Kaze_4_OCTETS performance: 364KB/clock
/*
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
         count = countSTATIC>>2;
         countRemainder = countSTATIC % 4;

         while ( count && *(unsigned long *)(pbPattern+1+((count-1)<<2)) == *(unsigned long *)(pbTarget-cbPattern+1+((count-1)<<2)) ) {
               count--;
         }
	 //if (count == 0) {  // Disastrous degradation only from this line(317KB/clock when 1+2x4+2+1 bytes pattern: 'skillessness'; 312KB/clock when 1+1x4+2+1 bytes pattern: 'underdog'), otherwise 368KB/clock.
         while ( countRemainder && *(char *)(pbPattern+1+(countSTATIC-countRemainder)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-countRemainder)) ) {
               countRemainder--;
         }
         //if ( countRemainder == 0) return((long)(pbTarget-cbPattern));
         if ( count+countRemainder == 0) return((long)(pbTarget-cbPattern));
         //}
        }
*/

        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else { //if ( cbPattern<4)
if (cbTarget<961) // This value is arbitrary(don't know how exactly), it ensures(at least must) better performance than 'Boyer_Moore_Horspool'.
{
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = *(unsigned long *)(pbPattern);
//        countSTATIC = cbPattern-1;

    //SINGLET = *(char *)(pbPattern);
    SINGLET = ulHashPattern & 0xFF;
    Quadruplet2nd = SINGLET<<8;
    Quadruplet3rd = SINGLET<<16;
    Quadruplet4th = SINGLET<<24;

    for ( ;; )
    {
	AdvanceHopperGrass = 0;
	ulHashTarget = *(unsigned long *)(pbTarget-cbPattern);

        if ( ulHashPattern == ulHashTarget ) { // Three unnecessary comparisons here, but 'AdvanceHopperGrass' must be calculated - it has a higher priority.
//         count = countSTATIC;
//         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
//	       if ( countSTATIC==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) AdvanceHopperGrass++;
//               count--;
//         }
         count = cbPattern-1;
         while ( count && *(char *)(pbPattern+(cbPattern-count)) == *(char *)(pbTarget-count) ) {
	       if ( cbPattern-1==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-count) ) AdvanceHopperGrass++;
               count--;
         }
         if ( count == 0) Railgunhits++; //return((pbTarget-cbPattern));
        } else { // The goal here: to avoid memory accesses by stressing the registers.
    if ( Quadruplet2nd != (ulHashTarget & 0x0000FF00) ) {
         AdvanceHopperGrass++;
         if ( Quadruplet3rd != (ulHashTarget & 0x00FF0000) ) {
              AdvanceHopperGrass++;
              if ( Quadruplet4th != (ulHashTarget & 0xFF000000) ) AdvanceHopperGrass++;
         }
    }
	}

	AdvanceHopperGrass++;

	pbTarget = pbTarget + AdvanceHopperGrass;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else { //if (cbTarget<961)
        //countSTATIC = cbPattern-2; //r.6+
        //countSTATIC = cbPattern-2-3;
        //countSTATIC = cbPattern-2-2; // r.6+++ I suppose that the awful degradation comes from 2bytes more (from either 'if (countSTATIC<0) countSTATIC=0;' or 'count >0' fixes) which make the function unfittable in code cache lines?!
        //countSTATIC = cbPattern-2-3; // r.7- At last no recompared bytes in-between chars
        countSTATIC = cbPattern-2-2; // r.7 


    /* Preprocessing */
//    for (a=0; a < ASIZE; a++) {bm_bc[a]=cbPattern; bm_bc2nd[a]=-1;}
    for (a=0; a < ASIZE; a++) {bm_bc[a]=cbPattern; bm_bc2nd[a]=cbPattern+1;}
    for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1; 
//    for (j=0; j < cbPattern; j++) bm_bc2nd[pbPattern[j]]=j; 
    for (j=0; j < cbPattern; j++) bm_bc2nd[pbPattern[j]]=cbPattern-j; 


// Elsiane [
    for (a=0; a < ASIZE*ASIZE; a++) {bm_Sunday_Order2[a]= 0;}

    // alfalfa 7 long 6 BBs (al lf fa al lf fa) 3 distinct BBs (al lf fa) 
    // fast 4 0-1-2 fa as st
    //for (j=0; j < cbPattern-1; j++) bm_Sunday_Order2[*(unsigned short *)(pbPattern+j)]=1;

//The idea here is to use Order 2 of Sunday's approach:
//It takes all rows and columns containing a char from the pattern to be marked (as 1 i.e. active)
//   000 001 002 003 ...   A ... 255
//000                      * 
//001                      *
//002                      *
//003                      *
//...                      *
//  A  *   *   *   *   *   *   *   *  
//...                      *
//255                      *
    for (j=0; j < cbPattern; j++) 
	  for (a=0; a < ASIZE; a++)
              { bm_Sunday_Order2[(a<<8) + pbPattern[j]]=1;  // columns filling
                bm_Sunday_Order2[(pbPattern[j]<<8) + a]=1; // rows filling
	      }
// Elsiane ]


        ulHashPattern = *(unsigned long *)(pbPattern);

    AdvanceHopperGrass = 0;
    i=0;
    while (i <= cbTarget-cbPattern-1-1) {
//         ulHashTarget=*(unsigned long *)&pbTarget[i];
//         if ( ulHashTarget == ulHashPattern)
         if ( *(unsigned long *)&pbTarget[i] == ulHashPattern)
             {
         count = countSTATIC;
         while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)+4) == *(char *)(&pbTarget[i]+(countSTATIC-count)+4) ) { // if pattern length is 4 or 5 we have count=-1 and count=0 respectively i.e. no need of comparing in-between chars.
               count--;
         }
         if ( count == 0) Railgunhits++; //return(pbTarget+i);
	     }

if ( bm_Sunday_Order2[*(unsigned short *)&pbTarget[i+cbPattern]]==0 )
         i=i+cbPattern+1+1;
else
{
if ( bm_bc[(unsigned char)pbTarget[i+cbPattern-1]] < bm_bc2nd[(unsigned char)pbTarget[i+(cbPattern)]] )
         i= i + bm_bc2nd[(unsigned char)pbTarget[i+(cbPattern)]];
else
         i= i + bm_bc[(unsigned char)pbTarget[i+cbPattern-1]];
}





         AdvanceHopperGrass++;
    }

    if (i == cbTarget-cbPattern-1) {
//         ulHashTarget=*(unsigned long *)&pbTarget[i];
//         if ( ulHashTarget == ulHashPattern)
         if ( *(unsigned long *)&pbTarget[i] == ulHashPattern)
             {
         count = countSTATIC;
         while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)+4) == *(char *)(&pbTarget[i]+(countSTATIC-count)+4) ) { // if pattern length is 4 or 5 we have count=-1 and count=0 respectively i.e. no need of comparing in-between chars.
               count--;
         }
         if ( count == 0) Railgunhits++; //return(pbTarget+i);
	     }
         i++;
         AdvanceHopperGrass++;
    }

    if (i == cbTarget-cbPattern) {
//         ulHashTarget=*(unsigned long *)&pbTarget[i];
//         if ( ulHashTarget == ulHashPattern)
         if ( *(unsigned long *)&pbTarget[i] == ulHashPattern)
             {
         count = countSTATIC;
         while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)+4) == *(char *)(&pbTarget[i]+(countSTATIC-count)+4) ) { // if pattern length is 4 or 5 we have count=-1 and count=0 respectively i.e. no need of comparing in-between chars.
               count--;
         }
         if ( count == 0) Railgunhits++; //return(pbTarget+i);
	     }
         AdvanceHopperGrass++;
    }
    

GlobalSP += (int)((double)cbTarget/AdvanceHopperGrass*100);
GlobalI += AdvanceHopperGrass;
printf("Skip-Performance(bigger-the-better): %d%%, %d skips/iterations\n",(int)((double)cbTarget/AdvanceHopperGrass*100), AdvanceHopperGrass);

    return(NULL);
} //if (cbTarget<961)
} //if ( cbPattern<4)
}
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Sunday-Horspool algorithm ]




// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Sunday-Horspool algorithm [
// Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char.
char * Railgun_Quadruplet_7sunhorse_count_hits (char * pbTarget,
     char * pbPattern,
     unsigned long cbTarget,
     unsigned long cbPattern)
{
    char * pbTargetMax = pbTarget + cbTarget;
    register unsigned long ulHashPattern;
    register unsigned long ulHashTarget;
    //unsigned long count; //r.6+
    signed long count;
    //unsigned long countSTATIC; //r.6+
    signed long countSTATIC;
//  unsigned long countRemainder;

/*
    const unsigned char SINGLET = *(char *)(pbPattern);
    const unsigned long Quadruplet2nd = SINGLET<<8;
    const unsigned long Quadruplet3rd = SINGLET<<16;
    const unsigned long Quadruplet4th = SINGLET<<24;
*/
    unsigned char SINGLET;
    unsigned long Quadruplet2nd;
    unsigned long Quadruplet3rd;
    unsigned long Quadruplet4th;

    unsigned long  AdvanceHopperGrass;

    long i; //BMH needed
    int a, j;
    unsigned int bm_bc[ASIZE]; //BMH needed
    //signed int bm_bc2nd[ASIZE]; //BMS needed
    unsigned int bm_bc2nd[ASIZE]; //BMS needed

    unsigned char ch; //BMH needed
    unsigned long chchchch; //BMH needed
    unsigned short chchchchSHORT; //BMH needed
    //unsigned char * chPTR;
//    unsigned char lastch, firstch; //BMH needed

    if (cbPattern > cbTarget)
        return(NULL);

// Doesn't work when cbPattern = 1
// The next IF-fragment works very well with cbPattern>1, OBVIOUSLY IT MUST BE UNROLLED(but crippled with less functionality) SINCE either cbPattern=2 or cbPattern=3!
if ( cbPattern<4) { // This IF makes me unhappy: it slows down from 390KB/clock to 367KB/clock for 'fast' pattern. This fragment(for 2..3 pattern lengths) is needed because I need a function different than strchr but sticking to strstr i.e. lengths above 1 are to be handled.
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));
//        countSTATIC = cbPattern-2;

if ( cbPattern==3) {
    for ( ;; )
    {
        if ( ulHashPattern == ( (*(char *)(pbTarget-3))<<8 ) + *(pbTarget-1) ) {
         if ( *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) Railgunhits++; //return((pbTarget-3));
        }
        if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) pbTarget++;
        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else {
}
    for ( ;; )
    {
        // The line below gives for 'cbPattern'>=1:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/543
        // Karp_Rabin_Kaze_4_OCTETS performance: 372KB/clock
/*
        if ( (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
            return((long)(pbTarget-cbPattern));
*/

        // The fragment below gives for 'cbPattern'>=2:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/546
        // Karp_Rabin_Kaze_4_OCTETS performance: 370KB/clock

/*
//For 2 and 3 [
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
//         count = countSTATIC;
         count = cbPattern-2;
//         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
         while ( count && *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) { // Crippling i.e. only 2 and 3 chars are allowed!
               count--;
         }
         if ( count == 0) return((pbTarget-cbPattern));
        }
        if ( (char)(ulHashPattern>>8) != *(pbTarget-cbPattern+1) ) pbTarget++;
//For 2 and 3 ]
*/


        if ( ulHashPattern == ( (*(char *)(pbTarget-2))<<8 ) + *(pbTarget-1) )
            Railgunhits++; //return((pbTarget-2));
        if ( (char)(ulHashPattern>>8) != *(pbTarget-1) ) pbTarget++;


        // The fragment below gives for 'cbPattern'>=2:
	// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/554
	// Karp_Rabin_Kaze_4_OCTETS performance: 364KB/clock
/*
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
         count = countSTATIC>>2;
         countRemainder = countSTATIC % 4;

         while ( count && *(unsigned long *)(pbPattern+1+((count-1)<<2)) == *(unsigned long *)(pbTarget-cbPattern+1+((count-1)<<2)) ) {
               count--;
         }
	 //if (count == 0) {  // Disastrous degradation only from this line(317KB/clock when 1+2x4+2+1 bytes pattern: 'skillessness'; 312KB/clock when 1+1x4+2+1 bytes pattern: 'underdog'), otherwise 368KB/clock.
         while ( countRemainder && *(char *)(pbPattern+1+(countSTATIC-countRemainder)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-countRemainder)) ) {
               countRemainder--;
         }
         //if ( countRemainder == 0) return((long)(pbTarget-cbPattern));
         if ( count+countRemainder == 0) return((long)(pbTarget-cbPattern));
         //}
        }
*/

        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else { //if ( cbPattern<4)
if (cbTarget<961) // This value is arbitrary(don't know how exactly), it ensures(at least must) better performance than 'Boyer_Moore_Horspool'.
{
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = *(unsigned long *)(pbPattern);
//        countSTATIC = cbPattern-1;

    //SINGLET = *(char *)(pbPattern);
    SINGLET = ulHashPattern & 0xFF;
    Quadruplet2nd = SINGLET<<8;
    Quadruplet3rd = SINGLET<<16;
    Quadruplet4th = SINGLET<<24;

    for ( ;; )
    {
	AdvanceHopperGrass = 0;
	ulHashTarget = *(unsigned long *)(pbTarget-cbPattern);

        if ( ulHashPattern == ulHashTarget ) { // Three unnecessary comparisons here, but 'AdvanceHopperGrass' must be calculated - it has a higher priority.
//         count = countSTATIC;
//         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
//	       if ( countSTATIC==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) AdvanceHopperGrass++;
//               count--;
//         }
         count = cbPattern-1;
         while ( count && *(char *)(pbPattern+(cbPattern-count)) == *(char *)(pbTarget-count) ) {
	       if ( cbPattern-1==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-count) ) AdvanceHopperGrass++;
               count--;
         }
         if ( count == 0) Railgunhits++; //return((pbTarget-cbPattern));
        } else { // The goal here: to avoid memory accesses by stressing the registers.
    if ( Quadruplet2nd != (ulHashTarget & 0x0000FF00) ) {
         AdvanceHopperGrass++;
         if ( Quadruplet3rd != (ulHashTarget & 0x00FF0000) ) {
              AdvanceHopperGrass++;
              if ( Quadruplet4th != (ulHashTarget & 0xFF000000) ) AdvanceHopperGrass++;
         }
    }
	}

	AdvanceHopperGrass++;

	pbTarget = pbTarget + AdvanceHopperGrass;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else { //if (cbTarget<961)
        //countSTATIC = cbPattern-2; //r.6+
        //countSTATIC = cbPattern-2-3;
        //countSTATIC = cbPattern-2-2; // r.6+++ I suppose that the awful degradation comes from 2bytes more (from either 'if (countSTATIC<0) countSTATIC=0;' or 'count >0' fixes) which make the function unfittable in code cache lines?!
        //countSTATIC = cbPattern-2-3; // r.7- At last no recompared bytes in-between chars
        countSTATIC = cbPattern-2-2; // r.7 


    /* Preprocessing */
//    for (a=0; a < ASIZE; a++) {bm_bc[a]=cbPattern; bm_bc2nd[a]=-1;}
    for (a=0; a < ASIZE; a++) {bm_bc[a]=cbPattern; bm_bc2nd[a]=cbPattern+1;}
    for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1; 
//    for (j=0; j < cbPattern; j++) bm_bc2nd[pbPattern[j]]=j; 
    for (j=0; j < cbPattern; j++) bm_bc2nd[pbPattern[j]]=cbPattern-j; 

        ulHashPattern = *(unsigned long *)(pbPattern);

    AdvanceHopperGrass = 0;
    i=0;
    while (i <= cbTarget-cbPattern-1) {
         ulHashTarget=*(unsigned long *)&pbTarget[i];
         if ( ulHashTarget == ulHashPattern)
             {
         count = countSTATIC;
         while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)+4) == *(char *)(&pbTarget[i]+(countSTATIC-count)+4) ) { // if pattern length is 4 or 5 we have count=-1 and count=0 respectively i.e. no need of comparing in-between chars.
               count--;
         }
         if ( count == 0) Railgunhits++; //return(pbTarget+i);
	     }

//         i+=cbPattern;
//         i= i - bm_bc2nd[(unsigned char)pbTarget[i]];

//if ( bm_bc[(unsigned char)pbTarget[i+cbPattern-1]] < ((cbPattern) - bm_bc2nd[(unsigned char)pbTarget[i+(cbPattern)]]) )
//         i= i + ((cbPattern) - bm_bc2nd[(unsigned char)pbTarget[i+(cbPattern)]]);
if ( bm_bc[(unsigned char)pbTarget[i+cbPattern-1]] < bm_bc2nd[(unsigned char)pbTarget[i+(cbPattern)]] )
         i= i + bm_bc2nd[(unsigned char)pbTarget[i+(cbPattern)]];
else
         i= i + bm_bc[(unsigned char)pbTarget[i+cbPattern-1]];


         AdvanceHopperGrass++;
    }
    if (i == cbTarget-cbPattern) {
         ulHashTarget=*(unsigned long *)&pbTarget[i];
         if ( ulHashTarget == ulHashPattern)
             {
         count = countSTATIC;
         while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)+4) == *(char *)(&pbTarget[i]+(countSTATIC-count)+4) ) { // if pattern length is 4 or 5 we have count=-1 and count=0 respectively i.e. no need of comparing in-between chars.
               count--;
         }
         if ( count == 0) Railgunhits++; //return(pbTarget+i);
	     }
         AdvanceHopperGrass++;
    }
    

GlobalSP += (int)((double)cbTarget/AdvanceHopperGrass*100);
GlobalI += AdvanceHopperGrass;
printf("Skip-Performance(bigger-the-better): %d%%, %d skips/iterations\n",(int)((double)cbTarget/AdvanceHopperGrass*100), AdvanceHopperGrass);

    return(NULL);
} //if (cbTarget<961)
} //if ( cbPattern<4)
}
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Sunday-Horspool algorithm ]




// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Sunday algorithm [
// Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char.
char * Railgun_Quadruplet_7sun_count_hits (char * pbTarget,
     char * pbPattern,
     unsigned long cbTarget,
     unsigned long cbPattern)
{
    char * pbTargetMax = pbTarget + cbTarget;
    register unsigned long ulHashPattern;
    register unsigned long ulHashTarget;
    //unsigned long count; //r.6+
    signed long count;
    //unsigned long countSTATIC; //r.6+
    signed long countSTATIC;
//  unsigned long countRemainder;

/*
    const unsigned char SINGLET = *(char *)(pbPattern);
    const unsigned long Quadruplet2nd = SINGLET<<8;
    const unsigned long Quadruplet3rd = SINGLET<<16;
    const unsigned long Quadruplet4th = SINGLET<<24;
*/
    unsigned char SINGLET;
    unsigned long Quadruplet2nd;
    unsigned long Quadruplet3rd;
    unsigned long Quadruplet4th;

    unsigned long  AdvanceHopperGrass;

    long i; //BMH needed
    int a, j;
    unsigned int bm_bc[ASIZE]; //BMH needed
    signed int bm_bc2nd[ASIZE]; //BMS needed

    unsigned char ch; //BMH needed
    unsigned long chchchch; //BMH needed
    unsigned short chchchchSHORT; //BMH needed
    //unsigned char * chPTR;
//    unsigned char lastch, firstch; //BMH needed

    if (cbPattern > cbTarget)
        return(NULL);

// Doesn't work when cbPattern = 1
// The next IF-fragment works very well with cbPattern>1, OBVIOUSLY IT MUST BE UNROLLED(but crippled with less functionality) SINCE either cbPattern=2 or cbPattern=3!
if ( cbPattern<4) { // This IF makes me unhappy: it slows down from 390KB/clock to 367KB/clock for 'fast' pattern. This fragment(for 2..3 pattern lengths) is needed because I need a function different than strchr but sticking to strstr i.e. lengths above 1 are to be handled.
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));
//        countSTATIC = cbPattern-2;

if ( cbPattern==3) {
    for ( ;; )
    {
        if ( ulHashPattern == ( (*(char *)(pbTarget-3))<<8 ) + *(pbTarget-1) ) {
         if ( *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) Railgunhits++; //return((pbTarget-3));
        }
        if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) pbTarget++;
        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else {
}
    for ( ;; )
    {
        // The line below gives for 'cbPattern'>=1:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/543
        // Karp_Rabin_Kaze_4_OCTETS performance: 372KB/clock
/*
        if ( (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
            return((long)(pbTarget-cbPattern));
*/

        // The fragment below gives for 'cbPattern'>=2:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/546
        // Karp_Rabin_Kaze_4_OCTETS performance: 370KB/clock

/*
//For 2 and 3 [
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
//         count = countSTATIC;
         count = cbPattern-2;
//         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
         while ( count && *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) { // Crippling i.e. only 2 and 3 chars are allowed!
               count--;
         }
         if ( count == 0) return((pbTarget-cbPattern));
        }
        if ( (char)(ulHashPattern>>8) != *(pbTarget-cbPattern+1) ) pbTarget++;
//For 2 and 3 ]
*/


        if ( ulHashPattern == ( (*(char *)(pbTarget-2))<<8 ) + *(pbTarget-1) )
            Railgunhits++; //return((pbTarget-2));
        if ( (char)(ulHashPattern>>8) != *(pbTarget-1) ) pbTarget++;


        // The fragment below gives for 'cbPattern'>=2:
	// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/554
	// Karp_Rabin_Kaze_4_OCTETS performance: 364KB/clock
/*
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
         count = countSTATIC>>2;
         countRemainder = countSTATIC % 4;

         while ( count && *(unsigned long *)(pbPattern+1+((count-1)<<2)) == *(unsigned long *)(pbTarget-cbPattern+1+((count-1)<<2)) ) {
               count--;
         }
	 //if (count == 0) {  // Disastrous degradation only from this line(317KB/clock when 1+2x4+2+1 bytes pattern: 'skillessness'; 312KB/clock when 1+1x4+2+1 bytes pattern: 'underdog'), otherwise 368KB/clock.
         while ( countRemainder && *(char *)(pbPattern+1+(countSTATIC-countRemainder)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-countRemainder)) ) {
               countRemainder--;
         }
         //if ( countRemainder == 0) return((long)(pbTarget-cbPattern));
         if ( count+countRemainder == 0) return((long)(pbTarget-cbPattern));
         //}
        }
*/

        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else { //if ( cbPattern<4)
if (cbTarget<961) // This value is arbitrary(don't know how exactly), it ensures(at least must) better performance than 'Boyer_Moore_Horspool'.
{
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = *(unsigned long *)(pbPattern);
//        countSTATIC = cbPattern-1;

    //SINGLET = *(char *)(pbPattern);
    SINGLET = ulHashPattern & 0xFF;
    Quadruplet2nd = SINGLET<<8;
    Quadruplet3rd = SINGLET<<16;
    Quadruplet4th = SINGLET<<24;

    for ( ;; )
    {
	AdvanceHopperGrass = 0;
	ulHashTarget = *(unsigned long *)(pbTarget-cbPattern);

        if ( ulHashPattern == ulHashTarget ) { // Three unnecessary comparisons here, but 'AdvanceHopperGrass' must be calculated - it has a higher priority.
//         count = countSTATIC;
//         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
//	       if ( countSTATIC==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) AdvanceHopperGrass++;
//               count--;
//         }
         count = cbPattern-1;
         while ( count && *(char *)(pbPattern+(cbPattern-count)) == *(char *)(pbTarget-count) ) {
	       if ( cbPattern-1==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-count) ) AdvanceHopperGrass++;
               count--;
         }
         if ( count == 0) Railgunhits++; //return((pbTarget-cbPattern));
        } else { // The goal here: to avoid memory accesses by stressing the registers.
    if ( Quadruplet2nd != (ulHashTarget & 0x0000FF00) ) {
         AdvanceHopperGrass++;
         if ( Quadruplet3rd != (ulHashTarget & 0x00FF0000) ) {
              AdvanceHopperGrass++;
              if ( Quadruplet4th != (ulHashTarget & 0xFF000000) ) AdvanceHopperGrass++;
         }
    }
	}

	AdvanceHopperGrass++;

	pbTarget = pbTarget + AdvanceHopperGrass;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else { //if (cbTarget<961)
        //countSTATIC = cbPattern-2; //r.6+
        //countSTATIC = cbPattern-2-3;
        //countSTATIC = cbPattern-2-2; // r.6+++ I suppose that the awful degradation comes from 2bytes more (from either 'if (countSTATIC<0) countSTATIC=0;' or 'count >0' fixes) which make the function unfittable in code cache lines?!
        //countSTATIC = cbPattern-2-3; // r.7- At last no recompared bytes in-between chars
        countSTATIC = cbPattern-2-2; // r.7 




    /* Preprocessing */
    for (a=0; a < ASIZE; a++) {bm_bc2nd[a]= -1;}
    for (j=0; j < cbPattern; j++) bm_bc2nd[pbPattern[j]]=j; 


//        ulHashPattern = *(unsigned long *)(pbPattern+cbPattern-1-3);
        ulHashPattern = *(unsigned long *)(pbPattern);

/*
// Original Sunday [
    AdvanceHopperGrass = 0;
    i=0;
    while (i <= cbTarget-cbPattern) {
         ulHashTarget=*(unsigned long *)&pbTarget[i];
         if ( ulHashTarget == ulHashPattern)
             {
         count = countSTATIC;
         while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)+4) == *(char *)(&pbTarget[i]+(countSTATIC-count)+4) ) { // if pattern length is 4 or 5 we have count=-1 and count=0 respectively i.e. no need of comparing in-between chars.
               count--;
         }
         if ( count == 0) Railgunhits++; //return(pbTarget+i);
	     }

         i+=cbPattern;
         if (i<cbTarget) i= i - bm_bc2nd[(unsigned char)pbTarget[i]];

         AdvanceHopperGrass++;
    }
// Original Sunday ]
*/

    AdvanceHopperGrass = 0;
    i=0;
    while (i <= cbTarget-cbPattern-1) {
         ulHashTarget=*(unsigned long *)&pbTarget[i];
         if ( ulHashTarget == ulHashPattern)
             {
         count = countSTATIC;
         while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)+4) == *(char *)(&pbTarget[i]+(countSTATIC-count)+4) ) { // if pattern length is 4 or 5 we have count=-1 and count=0 respectively i.e. no need of comparing in-between chars.
               count--;
         }
         if ( count == 0) Railgunhits++; //return(pbTarget+i);
	     }

         i+=cbPattern;
         i= i - bm_bc2nd[(unsigned char)pbTarget[i]];

         AdvanceHopperGrass++;
    }
    if (i == cbTarget-cbPattern) {
         ulHashTarget=*(unsigned long *)&pbTarget[i];
         if ( ulHashTarget == ulHashPattern)
             {
         count = countSTATIC;
         while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)+4) == *(char *)(&pbTarget[i]+(countSTATIC-count)+4) ) { // if pattern length is 4 or 5 we have count=-1 and count=0 respectively i.e. no need of comparing in-between chars.
               count--;
         }
         if ( count == 0) Railgunhits++; //return(pbTarget+i);
	     }
         AdvanceHopperGrass++;
    }
    

/*
Input Pattern(up to 19+2000 chars): fast

Skip-Performance(bigger-the-better): 456%, 45330622 skips/iterations
Railgun_Quadruplet_7sun_hits/Railgun_Quadruplet_7sun_clocks: 5945/193
Railgun_Quadruplet_7sun performance: 1046KB/clock

Skip-Performance(bigger-the-better): 389%, 53138919 skips/iterations
Railgun_Quadruplet_7deuce_hits/Railgun_Quadruplet_7deuce_clocks: 5945/356
Railgun_Quadruplet_7deuce performance: 567KB/clock

Skip-Performance(bigger-the-better): 377%, 54788054 skips/iterations
Railgun_Quadruplet_7_hits/Railgun_Quadruplet_7_clocks: 5945/207
Railgun_Quadruplet_7 performance: 976KB/clock

Input Pattern(up to 19+2000 chars): faster

Skip-Performance(bigger-the-better): 591%, 34996936 skips/iterations
Railgun_Quadruplet_7sun_hits/Railgun_Quadruplet_7sun_clocks: 744/151
Railgun_Quadruplet_7sun performance: 1338KB/clock

Skip-Performance(bigger-the-better): 567%, 36434006 skips/iterations
Railgun_Quadruplet_7deuce_hits/Railgun_Quadruplet_7deuce_clocks: 744/307
Railgun_Quadruplet_7deuce performance: 658KB/clock

Skip-Performance(bigger-the-better): 514%, 40194194 skips/iterations
Railgun_Quadruplet_7_hits/Railgun_Quadruplet_7_clocks: 744/153
Railgun_Quadruplet_7 performance: 1320KB/clock

Input Pattern(up to 19+2000 chars): lesser

Skip-Performance(bigger-the-better): 619%, 33423799 skips/iterations
Railgun_Quadruplet_7sun_hits/Railgun_Quadruplet_7sun_clocks: 229/143
Railgun_Quadruplet_7sun performance: 1413KB/clock

Skip-Performance(bigger-the-better): 580%, 35625791 skips/iterations
Railgun_Quadruplet_7deuce_hits/Railgun_Quadruplet_7deuce_clocks: 229/256
Railgun_Quadruplet_7deuce performance: 789KB/clock

Skip-Performance(bigger-the-better): 543%, 38047818 skips/iterations
Railgun_Quadruplet_7_hits/Railgun_Quadruplet_7_clocks: 229/146
Railgun_Quadruplet_7 performance: 1383KB/clock

Input Pattern(up to 19+2000 chars): beautiful

Skip-Performance(bigger-the-better): 862%, 23986874 skips/iterations
Railgun_Quadruplet_7sun_hits/Railgun_Quadruplet_7sun_clocks: 27958/110
Railgun_Quadruplet_7sun performance: 1836KB/clock

Skip-Performance(bigger-the-better): 857%, 24117314 skips/iterations
Railgun_Quadruplet_7deuce_hits/Railgun_Quadruplet_7deuce_clocks: 27958/212
Railgun_Quadruplet_7deuce performance: 953KB/clock

Skip-Performance(bigger-the-better): 790%, 26174714 skips/iterations
Railgun_Quadruplet_7_hits/Railgun_Quadruplet_7_clocks: 27958/105
Railgun_Quadruplet_7 performance: 1924KB/clock

Input Pattern(up to 19+2000 chars): beautiful dress

Skip-Performance(bigger-the-better): 967%, 21375549 skips/iterations
Railgun_Quadruplet_7sun_hits/Railgun_Quadruplet_7sun_clocks: 11/101
Railgun_Quadruplet_7sun performance: 2000KB/clock

Skip-Performance(bigger-the-better): 1093%, 18928081 skips/iterations
Railgun_Quadruplet_7deuce_hits/Railgun_Quadruplet_7deuce_clocks: 11/196
Railgun_Quadruplet_7deuce performance: 1030KB/clock

Skip-Performance(bigger-the-better): 869%, 23787030 skips/iterations
Railgun_Quadruplet_7_hits/Railgun_Quadruplet_7_clocks: 11/99
Railgun_Quadruplet_7 performance: 2041KB/clock
*/




GlobalSP += (int)((double)cbTarget/AdvanceHopperGrass*100);
GlobalI += AdvanceHopperGrass;
printf("Skip-Performance(bigger-the-better): %d%%, %d skips/iterations\n",(int)((double)cbTarget/AdvanceHopperGrass*100), AdvanceHopperGrass);

    return(NULL);
} //if (cbTarget<961)
} //if ( cbPattern<4)
}
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Sunday algorithm ]



















// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm [
// Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char.
char * Railgun_Quadruplet_7deuce_count_hits (char * pbTarget,
     char * pbPattern,
     unsigned long cbTarget,
     unsigned long cbPattern)
{
    char * pbTargetMax = pbTarget + cbTarget;
    register unsigned long ulHashPattern;
    register unsigned long ulHashTarget;
    //unsigned long count; //r.6+
    signed long count;
    //unsigned long countSTATIC; //r.6+
    signed long countSTATIC;
//  unsigned long countRemainder;

/*
    const unsigned char SINGLET = *(char *)(pbPattern);
    const unsigned long Quadruplet2nd = SINGLET<<8;
    const unsigned long Quadruplet3rd = SINGLET<<16;
    const unsigned long Quadruplet4th = SINGLET<<24;
*/
    unsigned char SINGLET;
    unsigned long Quadruplet2nd;
    unsigned long Quadruplet3rd;
    unsigned long Quadruplet4th;

    unsigned long  AdvanceHopperGrass;

    long i; //BMH needed
    int a, j, bm_bc[ASIZE], bm_bc2nd[ASIZE]; //BMH needed
    unsigned char ch; //BMH needed
    unsigned long chchchch; //BMH needed
    unsigned short chchchchSHORT; //BMH needed
    //unsigned char * chPTR;
//    unsigned char lastch, firstch; //BMH needed

    if (cbPattern > cbTarget)
        return(NULL);

// Doesn't work when cbPattern = 1
// The next IF-fragment works very well with cbPattern>1, OBVIOUSLY IT MUST BE UNROLLED(but crippled with less functionality) SINCE either cbPattern=2 or cbPattern=3!
if ( cbPattern<4) { // This IF makes me unhappy: it slows down from 390KB/clock to 367KB/clock for 'fast' pattern. This fragment(for 2..3 pattern lengths) is needed because I need a function different than strchr but sticking to strstr i.e. lengths above 1 are to be handled.
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));
//        countSTATIC = cbPattern-2;

if ( cbPattern==3) {
    for ( ;; )
    {
        if ( ulHashPattern == ( (*(char *)(pbTarget-3))<<8 ) + *(pbTarget-1) ) {
         if ( *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) Railgunhits++; //return((pbTarget-3));
        }
        if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) pbTarget++;
        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else {
}
    for ( ;; )
    {
        // The line below gives for 'cbPattern'>=1:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/543
        // Karp_Rabin_Kaze_4_OCTETS performance: 372KB/clock
/*
        if ( (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
            return((long)(pbTarget-cbPattern));
*/

        // The fragment below gives for 'cbPattern'>=2:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/546
        // Karp_Rabin_Kaze_4_OCTETS performance: 370KB/clock

/*
//For 2 and 3 [
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
//         count = countSTATIC;
         count = cbPattern-2;
//         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
         while ( count && *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) { // Crippling i.e. only 2 and 3 chars are allowed!
               count--;
         }
         if ( count == 0) return((pbTarget-cbPattern));
        }
        if ( (char)(ulHashPattern>>8) != *(pbTarget-cbPattern+1) ) pbTarget++;
//For 2 and 3 ]
*/


        if ( ulHashPattern == ( (*(char *)(pbTarget-2))<<8 ) + *(pbTarget-1) )
            Railgunhits++; //return((pbTarget-2));
        if ( (char)(ulHashPattern>>8) != *(pbTarget-1) ) pbTarget++;


        // The fragment below gives for 'cbPattern'>=2:
	// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/554
	// Karp_Rabin_Kaze_4_OCTETS performance: 364KB/clock
/*
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
         count = countSTATIC>>2;
         countRemainder = countSTATIC % 4;

         while ( count && *(unsigned long *)(pbPattern+1+((count-1)<<2)) == *(unsigned long *)(pbTarget-cbPattern+1+((count-1)<<2)) ) {
               count--;
         }
	 //if (count == 0) {  // Disastrous degradation only from this line(317KB/clock when 1+2x4+2+1 bytes pattern: 'skillessness'; 312KB/clock when 1+1x4+2+1 bytes pattern: 'underdog'), otherwise 368KB/clock.
         while ( countRemainder && *(char *)(pbPattern+1+(countSTATIC-countRemainder)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-countRemainder)) ) {
               countRemainder--;
         }
         //if ( countRemainder == 0) return((long)(pbTarget-cbPattern));
         if ( count+countRemainder == 0) return((long)(pbTarget-cbPattern));
         //}
        }
*/

        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else { //if ( cbPattern<4)
if (cbTarget<961) // This value is arbitrary(don't know how exactly), it ensures(at least must) better performance than 'Boyer_Moore_Horspool'.
{
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = *(unsigned long *)(pbPattern);
//        countSTATIC = cbPattern-1;

    //SINGLET = *(char *)(pbPattern);
    SINGLET = ulHashPattern & 0xFF;
    Quadruplet2nd = SINGLET<<8;
    Quadruplet3rd = SINGLET<<16;
    Quadruplet4th = SINGLET<<24;

    for ( ;; )
    {
	AdvanceHopperGrass = 0;
	ulHashTarget = *(unsigned long *)(pbTarget-cbPattern);

        if ( ulHashPattern == ulHashTarget ) { // Three unnecessary comparisons here, but 'AdvanceHopperGrass' must be calculated - it has a higher priority.
//         count = countSTATIC;
//         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
//	       if ( countSTATIC==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) AdvanceHopperGrass++;
//               count--;
//         }
         count = cbPattern-1;
         while ( count && *(char *)(pbPattern+(cbPattern-count)) == *(char *)(pbTarget-count) ) {
	       if ( cbPattern-1==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-count) ) AdvanceHopperGrass++;
               count--;
         }
         if ( count == 0) Railgunhits++; //return((pbTarget-cbPattern));
        } else { // The goal here: to avoid memory accesses by stressing the registers.
    if ( Quadruplet2nd != (ulHashTarget & 0x0000FF00) ) {
         AdvanceHopperGrass++;
         if ( Quadruplet3rd != (ulHashTarget & 0x00FF0000) ) {
              AdvanceHopperGrass++;
              if ( Quadruplet4th != (ulHashTarget & 0xFF000000) ) AdvanceHopperGrass++;
         }
    }
	}

	AdvanceHopperGrass++;

	pbTarget = pbTarget + AdvanceHopperGrass;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else { //if (cbTarget<961)
        //countSTATIC = cbPattern-2; //r.6+
        //countSTATIC = cbPattern-2-3;
        //countSTATIC = cbPattern-2-2; // r.6+++ I suppose that the awful degradation comes from 2bytes more (from either 'if (countSTATIC<0) countSTATIC=0;' or 'count >0' fixes) which make the function unfittable in code cache lines?!
        //countSTATIC = cbPattern-2-3; // r.7- At last no recompared bytes in-between chars
        countSTATIC = cbPattern-2-2; // r.7 

        //chPTR=(unsigned char *)&chchchch+3;
// Next line fixes the BUG from r.6++: but with awful speed degradation! So the bug is fixed in the definitions by setting 'countSTATIC = cbPattern-2-2;', bug appears only for patterns with lengths of 4, The setback is one unnecessary comparison for 5bytes patterns, stupidly such setback exists (from before) for 4bytes as well.
//if (countSTATIC<0) countSTATIC=0;
    /* Preprocessing */
    for (a=0; a < ASIZE; a++) {bm_bc[a]=cbPattern; bm_bc2nd[a]=cbPattern-3+(2);}
//    for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1; // 'ABCD' ABC?  A/D jump with 3/4
//    for (j=cbPattern-1; j > 0; j--) bm_bc[pbPattern[j]]=j;             // 'ABCD' ?BCD  A/D jump with 4/3


    for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1; 
    for (j=0; j < (cbPattern-3+(2))-1; j++) bm_bc2nd[pbPattern[j]]=(cbPattern-3+(2))-j-1; 



//        ulHashPattern = *(unsigned long *)(pbPattern+cbPattern-1-3);
        ulHashPattern = *(unsigned long *)(pbPattern);

    AdvanceHopperGrass = 0;
    i=0;
    while (i <= cbTarget-cbPattern) {
       //ch=pbTarget[i+cbPattern-1];
       //ch=pbTarget[i];
          //if ( pbTarget[i] == pbPattern[0] && *(unsigned long *)&pbTarget[i+cbPattern-1-3] == ulHashPattern) // No problema here since we have 4[+] long pattern here. Overlapping (1 byte recompared) when length=4, grmbl.
//          if ( *(unsigned long *)&pbTarget[i] == ulHashPattern ) // The lesson I learned from r.7- now applied in r.7: instead of extracting 'ch' having higher address now the lower address is extracted first in order (hopefully, the test confirms it) the next 32bytes (including 'ch') to be cached i.e. to comparison part is faster.
//ulHashTarget=*(unsigned long *)&pbTarget[i+cbPattern-1-3];
ulHashTarget=*(unsigned long *)&pbTarget[i];
//ch = (unsigned char)(ulHashTarget>>24);
//ch = (unsigned char)pbTarget[i+3];
//ch = (unsigned char)bm_bc2nd[(unsigned char)pbTarget[i+3]];
//ch = (unsigned char)bm_bc[(unsigned char)pbTarget[i+cbPattern-1]];

         if ( ulHashTarget == ulHashPattern)
             {
         count = countSTATIC;
         while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)+4) == *(char *)(&pbTarget[i]+(countSTATIC-count)+4) ) { // if pattern length is 4 or 5 we have count=-1 and count=0 respectively i.e. no need of comparing in-between chars.
               count--;
         }
         if ( count == 0) Railgunhits++; //return(pbTarget+i);
	     }


/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ [
Input Pattern(up to 19+2000 chars): lesser

[
if ( bm_bc[(unsigned char)pbTarget[i+cbPattern-1]] < bm_bc2nd[(unsigned char)pbTarget[i+cbPattern-1-3+(2)]] )
//if ( pbTarget[i+cbPattern-1] == pbPattern[cbPattern-1-3+(2)] )
         i= i + bm_bc2nd[(unsigned char)pbTarget[i+cbPattern-1-3+(2)]];
else
         i= i + bm_bc[(unsigned char)pbTarget[i+cbPattern-1]];

Skip-Performance(bigger-the-better): 580%, 35625791 skips/iterations
Bozan_hits/Bozan_clocks: 229/261
Bozan performance: 774KB/clock

if ( bm_bc[(unsigned char)pbTarget[i+cbPattern-1]] < bm_bc2nd[(unsigned char)pbTarget[i+cbPattern-1-3+(2)]] )
{
//if ( pbTarget[i+cbPattern-1] == pbPattern[cbPattern-1-3+(2)] )
printf("%d,%d\n",bm_bc[(unsigned char)pbTarget[i+cbPattern-1]], bm_bc2nd[(unsigned char)pbTarget[i+cbPattern-1-3+(2)]]);
         i= i + bm_bc2nd[(unsigned char)pbTarget[i+cbPattern-1-3+(2)]];
}
else
         i= i + bm_bc[(unsigned char)pbTarget[i+cbPattern-1]];

//      ?%
//  lesser
// 65432166
// 65122166

//  lesse
// 5432155
// 5431135

// 1,3 %<? e<e 
// 2,3 %<? s<e 
// 1,4 %<? e<l 
// 2,4 %<? s<l 
// 1,5 %<? e<Z(any non-pattern) 
// 2,5 %<? s<Z(any non-pattern) 

35625791 vs 36285892 iterations means 0,660,101 less cycles!
35625791 vs 38047818 iterations means 2,422,027 less cycles!
]

[
//if ( bm_bc[(unsigned char)pbTarget[i+cbPattern-1]] < bm_bc2nd[(unsigned char)pbTarget[i+cbPattern-1-3+(2)]] )
if ( pbTarget[i+cbPattern-1] == pbPattern[cbPattern-1-3+(2)] )
         i= i + bm_bc2nd[(unsigned char)pbTarget[i+cbPattern-1-3+(2)]];
else
         i= i + bm_bc[(unsigned char)pbTarget[i+cbPattern-1]];

Skip-Performance(bigger-the-better): 570%, 36285892 skips/iterations
Bozan_hits/Bozan_clocks: 229/208
Bozan performance: 971KB/clock
]

[
//if ( bm_bc[(unsigned char)pbTarget[i+cbPattern-1]] < bm_bc2nd[(unsigned char)pbTarget[i+cbPattern-1-3+(2)]] )
//if ( pbTarget[i+cbPattern-1] == pbPattern[cbPattern-1-3+(2)] )
//         i= i + bm_bc2nd[(unsigned char)pbTarget[i+cbPattern-1-3+(2)]];
//else
         i= i + bm_bc[(unsigned char)pbTarget[i+cbPattern-1]];

Skip-Performance(bigger-the-better): 543%, 38047818 skips/iterations
Bozan_hits/Bozan_clocks: 229/148
Bozan performance: 1365KB/clock
]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ]
*/


// Variant B
// Railgun_Quadruplet_7deuce 6x2 i.e. average performance: 1130KB/clock
// Railgun_Quadruplet_7deuce 6x2 i.e. average performance: 1124KB/clock
// Railgun_Quadruplet_7deuce 6x2 i.e. average performance: 1131KB/clock
// Variant A
// Railgun_Quadruplet_7deuce 6x2 i.e. average performance: 1121KB/clock
// Railgun_Quadruplet_7deuce 6x2 i.e. average performance: 1115KB/clock
// Railgun_Quadruplet_7deuce 6x2 i.e. average performance: 1121KB/clock

//if ( bm_bc[(unsigned char)pbTarget[i+cbPattern-1]] < bm_bc2nd[(unsigned char)pbTarget[i+cbPattern-1-3+(2)]] ) // Variant A
if ( bm_bc2nd[(unsigned char)pbTarget[i+cbPattern-1-3+(2)]] > bm_bc[(unsigned char)pbTarget[i+cbPattern-1]] ) // Variant B
         i= i + bm_bc2nd[(unsigned char)pbTarget[i+cbPattern-1-3+(2)]];
else
         i= i + bm_bc[(unsigned char)pbTarget[i+cbPattern-1]];

AdvanceHopperGrass++;

    }

GlobalSP += (int)((double)cbTarget/AdvanceHopperGrass*100);
GlobalI += AdvanceHopperGrass;
printf("Skip-Performance(bigger-the-better): %d%%, %d skips/iterations\n",(int)((double)cbTarget/AdvanceHopperGrass*100), AdvanceHopperGrass);


// Revision 7deuce vs 7, a short showdown

// Just wanted to see how a two barrel gun is gonna fire, (in fact how a search for two (choosing the bigger skip every time) patterns performs):
// Note1: Two skip tables are in use, the two patterns are: the original and the original one char(the-most-right) short. For example fast/fas, faster/faste, beautiful/beautifu as follows:

// r.7:
// Doing Search for Pattern(4bytes) into String(206908949bytes) as-one-line ...
// Input Pattern(up to 19+2000 chars): fast
// Performance(bigger-the-better): 377%, 54788054 skips
// Railgun_Quadruplet_7_hits/Railgun_Quadruplet_7_clocks: 5945/220
// Railgun_Quadruplet_7 performance: 918KB/clock

// r.7deuce:
// Doing Search for Pattern(4bytes) into String(206908949bytes) as-one-line ...
// Input Pattern(up to 19+2000 chars): fast
// Performance(bigger-the-better): 389%, 53138919 skips
// Railgun_Quadruplet_7deuce_hits/Railgun_Quadruplet_7deuce_clocks: 5945/363
// Railgun_Quadruplet_7deuce performance: 556KB/clock

// r.7:
// Doing Search for Pattern(6bytes) into String(206908949bytes) as-one-line ...
// Input Pattern(up to 19+2000 chars): faster
// Performance(bigger-the-better): 514%, 40194194 skips
// Railgun_Quadruplet_7_hits/Railgun_Quadruplet_7_clocks: 744/164
// Railgun_Quadruplet_7 performance: 1232KB/clock

// r.7deuce:
// Doing Search for Pattern(6bytes) into String(206908949bytes) as-one-line ...
// Input Pattern(up to 19+2000 chars): faster
// Performance(bigger-the-better): 567%, 36434006 skips
// Railgun_Quadruplet_7deuce_hits/Railgun_Quadruplet_7deuce_clocks: 744/314
// Railgun_Quadruplet_7deuce performance: 643KB/clock

// r.7:
// Doing Search for Pattern(9bytes) into String(206908949bytes) as-one-line ...
// Input Pattern(up to 19+2000 chars): beautiful
// Performance(bigger-the-better): 790%, 26174714 skips
// Railgun_Quadruplet_7_hits/Railgun_Quadruplet_7_clocks: 27958/114
// Railgun_Quadruplet_7 performance: 1772KB/clock

// r.7deuce:
// Doing Search for Pattern(9bytes) into String(206908949bytes) as-one-line ...
// Input Pattern(up to 19+2000 chars): beautiful
// Performance(bigger-the-better): 857%, 24117314 skips
// Railgun_Quadruplet_7deuce_hits/Railgun_Quadruplet_7deuce_clocks: 27958/216
// Railgun_Quadruplet_7deuce performance: 935KB/clock

// r.7:
// Doing Search for Pattern(15bytes) into String(206908949bytes) as-one-line ...
// Input Pattern(up to 19+2000 chars): beautiful dress
// Performance(bigger-the-better): 869%, 23787030 skips
// Railgun_Quadruplet_7_hits/Railgun_Quadruplet_7_clocks: 11/107
// Railgun_Quadruplet_7 performance: 1888KB/clock

// r.7deuce:
// Doing Search for Pattern(15bytes) into String(206908949bytes) as-one-line ...
// Input Pattern(up to 19+2000 chars): beautiful dress
// Performance(bigger-the-better): 1093%, 18928081 skips
// Railgun_Quadruplet_7deuce_hits/Railgun_Quadruplet_7deuce_clocks: 11/198
// Railgun_Quadruplet_7deuce performance: 1020KB/clock

// Note2: Lower number of skips means bigger strides, to traverse the distance effectively requires two things: to make bigger strides and to make them faster.
// Note3: Revision 7deuce is nastily slower when compared to r.7 but stronger(the impact from bad scenarios is lower), Pentium Merom 2166Mhz is used.
// Note4: The firing-test shows that adding a single if-else fragment leads to almost two times speed-performance-degradation.

// ; 3217 : if ( bm_bc[(unsigned char)pbTarget[i+cbPattern-1]] < bm_bc2nd[(unsigned char)pbTarget[i+cbPattern-1-3+(2)]] )
// 
//   011ab	8b 54 24 24	 mov	 edx, DWORD PTR tv689[esp+2104]
//   011af	0f b6 0c 02	 movzx	 ecx, BYTE PTR [edx+eax]
//   011b3	8b 54 24 28	 mov	 edx, DWORD PTR tv698[esp+2104]
//   011b7	0f b6 14 02	 movzx	 edx, BYTE PTR [edx+eax]
//   011bb	8b 4c 8c 30	 mov	 ecx, DWORD PTR _bm_bc2nd$[esp+ecx*4+2104]
//   011bf	8b 94 94 30 04
// 	00 00		 mov	 edx, DWORD PTR _bm_bc$[esp+edx*4+2104]
//   011c6	3b d1		 cmp	 edx, ecx
//   011c8	7d 04		 jge	 SHORT $LN2@Railgun_Qu@5
// 
// ; 3218 :          i= i + bm_bc2nd[(unsigned char)pbTarget[i+cbPattern-1-3+(2)]];
// 
//   011ca	03 c1		 add	 eax, ecx
// 
// ; 3219 : else
// 
//   011cc	eb 02		 jmp	 SHORT $LN1@Railgun_Qu@5
// $LN2@Railgun_Qu@5:
// 
// ; 3220 :          i= i + bm_bc[(unsigned char)pbTarget[i+cbPattern-1]];
// 
//   011ce	03 c2		 add	 eax, edx
// $LN1@Railgun_Qu@5:

// Note5: I wonder how i7 will handle these (so many) memory accesses, in other words whether these ugly speed-performance ratios 1232/643=1.91:1 ('faster' pattern) and 1772/935=1.89:1 ('beautiful' pattern) will remain.










/*
        ulHashPattern = *(unsigned long *)(pbPattern);

    i=0;
    while (i <= cbTarget-cbPattern) {
       //ch=pbTarget[i+cbPattern-1];
       //ch=pbTarget[i];
          //if ( pbTarget[i] == pbPattern[0] && *(unsigned long *)&pbTarget[i+cbPattern-1-3] == ulHashPattern) // No problema here since we have 4[+] long pattern here. Overlapping (1 byte recompared) when length=4, grmbl.
//          if ( *(unsigned long *)&pbTarget[i] == ulHashPattern ) // The lesson I learned from r.7- now applied in r.7: instead of extracting 'ch' having higher address now the lower address is extracted first in order (hopefully, the test confirms it) the next 32bytes (including 'ch') to be cached i.e. to comparison part is faster.
ulHashTarget=*(unsigned long *)&pbTarget[i];
//ch = (unsigned char)(ulHashTarget>>24);
//ch = (unsigned char)pbTarget[i+3];
//ch = (unsigned char)bm_bc2nd[(unsigned char)pbTarget[i+3]];
//ch = (unsigned char)bm_bc[(unsigned char)pbTarget[i+cbPattern-1]];

         if ( ulHashTarget == ulHashPattern)
             {
         count = countSTATIC;
         while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)+4) == *(char *)(&pbTarget[i]+(countSTATIC-count)+4) ) { // if pattern length is 4 or 5 we have count=-1 and count=0 respectively i.e. no need of comparing in-between chars.
               count--;
         }
         if ( count == 0) Railgunhits++; //return(pbTarget+i);
	     }

//if ( bm_bc[(unsigned char)pbTarget[i+cbPattern-1]] < bm_bc2nd[(unsigned char)(ulHashTarget>>24)] ) // 1383KB 'beautiful'
//if ( bm_bc[(unsigned char)pbTarget[i+cbPattern-1]] < bm_bc2nd[(unsigned char)pbTarget[i+3]] ) // 1413KB 'beautiful'
if ( bm_bc[(unsigned char)pbTarget[i+cbPattern-1]] < 4 ) // 1433KB 'beautiful'
         i= i + bm_bc2nd[(unsigned char)pbTarget[i+3]];
else
         i= i + bm_bc[(unsigned char)pbTarget[i+cbPattern-1]];

    }
*/











/*
    for (j=cbPattern-1; j > 0; j--) bm_bc[pbPattern[j]]=j;             
ch = (unsigned char)bm_bc[pbPattern[0]];

    for (a=0; a < ASIZE; a++) bm_bc[a]=cbPattern;
    for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1; 

if (ch > (unsigned char)bm_bc[pbPattern[cbPattern-1]]) {

    for (a=0; a < ASIZE; a++) bm_bc[a]=cbPattern;
    for (j=cbPattern-1; j > 0; j--) bm_bc[pbPattern[j]]=j; // Restore backward state

        ulHashPattern = *(unsigned long *)(pbPattern);

// Backward [
    i=(cbTarget-cbPattern);
    while (i >= 0) {
       //ch=pbTarget[i+cbPattern-1];
//       ulHashTarget=*(unsigned long *)&pbTarget[i+cbPattern-1-3];
       ulHashTarget=*(unsigned long *)&pbTarget[i];
          if (ulHashTarget == ulHashPattern) // No problema here since we have 4[+] long pattern here. Overlapping (1 byte recompared) when length=4, grmbl.
             {
         count = countSTATIC;
         while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)+4) == *(char *)(&pbTarget[i]+(countSTATIC-count)+4) ) { // if pattern length is 4 or 5 we have count=-1 and count=0 respectively i.e. no need of comparing in-between chars.
               count--;
         }
         if ( count == 0) Railgunhits++; //return(pbTarget+i);
	     }
       //i+=bm_bc[*chPTR]; // Twice as slow than the below line!
//       i+=bm_bc[(unsigned char)pbTarget[i+cbPattern-1]];
         i-=bm_bc[(unsigned char)pbTarget[i]];
//         i-=bm_bc[(unsigned char)(ulHashTarget & 0xFF)]; // ?! superslow ?!
//         i-=bm_bc[*(unsigned char *)&ulHashTarget]; // ?! superslow ?!
    }
// Backward ]

} else {

    //for (a=0; a < ASIZE; a++) bm_bc[a]=cbPattern;
    //for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1; // Restore forward state

        ulHashPattern = *(unsigned long *)(pbPattern+cbPattern-1-3);

// Forward [
    //lastch=pbPattern[cbPattern-1];
    //firstch=pbPattern[0];
    i=0;
    while (i <= cbTarget-cbPattern) {
       //ch=pbTarget[i+cbPattern-1];
//       ulHashTarget=*(unsigned long *)&pbTarget[i+cbPattern-1-3];
       ulHashTarget=*(unsigned long *)&pbTarget[i+cbPattern-1-3];
          if (ulHashTarget == ulHashPattern) // No problema here since we have 4[+] long pattern here. Overlapping (1 byte recompared) when length=4, grmbl.
             {
         count = countSTATIC;
         while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)) == *(char *)(&pbTarget[i]+(countSTATIC-count)) ) { // if pattern length is 4 or 5 we have count=-1 and count=0 respectively i.e. no need of comparing in-between chars.
               count--;
         }
         if ( count == 0) Railgunhits++; //return(pbTarget+i);
	     }
       //i+=bm_bc[*chPTR]; // Twice as slow than the below line!
       i+=bm_bc[(unsigned char)(ulHashTarget>>24)];
    }
// Forward ]

}
*/

    return(NULL);
} //if (cbTarget<961)
} //if ( cbPattern<4)
}
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm ]


// Revision: 2, 2012-Feb-17, the main disadvantage: the preprocessing overhead.
// Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char.
// 
char * Railgun_Quadruplet_7Trident_count_hits (char * pbTarget, char * pbPattern, unsigned long cbTarget, unsigned long cbPattern)
{
	char * pbTargetMax = pbTarget + cbTarget;
	register unsigned long ulHashPattern;
	register unsigned long ulHashTarget;
	signed long count;
	signed long countSTATIC;

	unsigned char SINGLET;
	unsigned long Quadruplet2nd;
	unsigned long Quadruplet3rd;
	unsigned long Quadruplet4th;

	unsigned long  AdvanceHopperGrass;

	long i; //BMH needed
	int a, j;
	unsigned int bm_bc[256]; //BMH needed
	unsigned int bm_bc2nd[256]; //BMS needed
	unsigned char bm_Horspool_Order2[256*256]; //BMHSS(Elsiane) needed, 'char' limits patterns to 255, if 'long' then table becomes 256KB, grrr.
	unsigned long Gulliver; // or unsigned char or unsigned short

    // Trident i.e. BNDM reinforced [
    uint8_t    *tgt = (uint8_t*)pbTarget, *pat = (uint8_t*)pbPattern;
    //int        i, j;
    uint32_t   mask, maskv[256] = {0};
    // Trident i.e. BNDM reinforced ]

	if (cbPattern > cbTarget)
		return(NULL);

	if ( cbPattern<4) { 
		pbTarget = pbTarget+cbPattern;
		ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));

		if ( cbPattern==3) {
			for ( ;; ) {
				if ( ulHashPattern == ( (*(char *)(pbTarget-3))<<8 ) + *(pbTarget-1) ) {
					if ( *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) Railgunhits++; //return((pbTarget-3));
				}
				if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) {
					pbTarget++;
				        //if ( (char)(ulHashPattern>>8) != *(pbTarget-1) ) pbTarget++;
				        if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) pbTarget++; // r8Triplet
				}
/*
r.1 i.e. without 8Triplet tweak:
[
Pattern: TDK
Doing Search for Pattern(3bytes) into String(206908949bytes) as-one-line ...
Railgun_Quadruplet_7Trident_hits/Railgun_Quadruplet_7Trident_clocks: 0/220
Railgun_Quadruplet_7Trident performance: 918KB/clock

Pattern: the
Doing Search for Pattern(3bytes) into String(206908949bytes) as-one-line ...
Railgun_Quadruplet_7Trident_hits/Railgun_Quadruplet_7Trident_clocks: 2114180/332
Railgun_Quadruplet_7Trident performance: 608KB/clock

Pattern: pun
Doing Search for Pattern(3bytes) into String(206908949bytes) as-one-line ...
Railgun_Quadruplet_7Trident_hits/Railgun_Quadruplet_7Trident_clocks: 3474/234
Railgun_Quadruplet_7Trident performance: 863KB/clock

Pattern: zoo
Doing Search for Pattern(3bytes) into String(206908949bytes) as-one-line ...
Railgun_Quadruplet_7Trident_hits/Railgun_Quadruplet_7Trident_clocks: 316/210
Railgun_Quadruplet_7Trident performance: 962KB/clock

Pattern: inn
Doing Search for Pattern(3bytes) into String(206908949bytes) as-one-line ...
Railgun_Quadruplet_7Trident_hits/Railgun_Quadruplet_7Trident_clocks: 35383/285
Railgun_Quadruplet_7Trident performance: 708KB/clock

Pattern: hey
Doing Search for Pattern(3bytes) into String(206908949bytes) as-one-line ...
Railgun_Quadruplet_7Trident_hits/Railgun_Quadruplet_7Trident_clocks: 226644/275
Railgun_Quadruplet_7Trident performance: 734KB/clock

Pattern: dot
Doing Search for Pattern(3bytes) into String(206908949bytes) as-one-line ...
Railgun_Quadruplet_7Trident_hits/Railgun_Quadruplet_7Trident_clocks: 1385/253
Railgun_Quadruplet_7Trident performance: 798KB/clock

Pattern: fum
Doing Search for Pattern(3bytes) into String(206908949bytes) as-one-line ...
Railgun_Quadruplet_7Trident_hits/Railgun_Quadruplet_7Trident_clocks: 697/226
Railgun_Quadruplet_7Trident performance: 894KB/clock
]

r.2 i.e. with 8Triplet tweak:
[
Pattern: TDK
Doing Search for Pattern(3bytes) into String(206908949bytes) as-one-line ...
Railgun_Quadruplet_7Trident_hits/Railgun_Quadruplet_7Trident_clocks: 0/210
Railgun_Quadruplet_7Trident performance: 962KB/clock

Pattern: the
Doing Search for Pattern(3bytes) into String(206908949bytes) as-one-line ...
Railgun_Quadruplet_7Trident_hits/Railgun_Quadruplet_7Trident_clocks: 2114180/316
Railgun_Quadruplet_7Trident performance: 639KB/clock

Pattern: pun
Doing Search for Pattern(3bytes) into String(206908949bytes) as-one-line ...
Railgun_Quadruplet_7Trident_hits/Railgun_Quadruplet_7Trident_clocks: 3474/224
Railgun_Quadruplet_7Trident performance: 902KB/clock

Pattern: zoo
Doing Search for Pattern(3bytes) into String(206908949bytes) as-one-line ...
Railgun_Quadruplet_7Trident_hits/Railgun_Quadruplet_7Trident_clocks: 316/202
Railgun_Quadruplet_7Trident performance: 1000KB/clock

Pattern: inn
Doing Search for Pattern(3bytes) into String(206908949bytes) as-one-line ...
Railgun_Quadruplet_7Trident_hits/Railgun_Quadruplet_7Trident_clocks: 35383/280
Railgun_Quadruplet_7Trident performance: 721KB/clock

Pattern: hey
Doing Search for Pattern(3bytes) into String(206908949bytes) as-one-line ...
Railgun_Quadruplet_7Trident_hits/Railgun_Quadruplet_7Trident_clocks: 226644/263
Railgun_Quadruplet_7Trident performance: 768KB/clock

Pattern: dot
Doing Search for Pattern(3bytes) into String(206908949bytes) as-one-line ...
Railgun_Quadruplet_7Trident_hits/Railgun_Quadruplet_7Trident_clocks: 1385/248
Railgun_Quadruplet_7Trident performance: 814KB/clock

Pattern: fum
Doing Search for Pattern(3bytes) into String(206908949bytes) as-one-line ...
Railgun_Quadruplet_7Trident_hits/Railgun_Quadruplet_7Trident_clocks: 697/220
Railgun_Quadruplet_7Trident performance: 918KB/clock
]
*/
				pbTarget++;
				if (pbTarget > pbTargetMax)
					return(NULL);
			}
		} else {
		}
		for ( ;; ) {
			if ( ulHashPattern == ( (*(char *)(pbTarget-2))<<8 ) + *(pbTarget-1) )
				Railgunhits++; //return((pbTarget-2));
			if ( (char)(ulHashPattern>>8) != *(pbTarget-1) ) pbTarget++;
			pbTarget++;
			if (pbTarget > pbTargetMax)
				return(NULL);
		}
	} else { //if ( cbPattern<4)
		if (cbTarget<961) { // This value is arbitrary(don't know how exactly), it ensures(at least must) better performance than 'Boyer_Moore_Horspool'.
/*
			// A better strstr, with no asm code
			// Written by Mischa Sandberg
			// http://mischasan.wordpress.com
			// static char const *
			// scanstrm(char const *tgt, char const *pat, int len)
			// {
			//     uint32_t head = MSBF32(pat), wind = 0, next;
			// 
			//     pat += 4, len -= 4;
			//     while ((next = *(uint8_t const*)tgt++)) {
			//         wind = ( wind << 8 ) + next;
			//         if (wind == head && !memcmp(tgt, pat, len))
			//             return tgt - 4;
			//     }
			//     return  NULL;
			//}
			ulHashPattern = 0;
			ulHashPattern = ( ulHashPattern << 8 ) + *(uint8_t const*)pbPattern++;
			ulHashPattern = ( ulHashPattern << 8 ) + *(uint8_t const*)pbPattern++;
			ulHashPattern = ( ulHashPattern << 8 ) + *(uint8_t const*)pbPattern++;
			ulHashPattern = ( ulHashPattern << 8 ) + *(uint8_t const*)pbPattern++;
			AdvanceHopperGrass = 0;
			cbPattern -= 4;
			while ((ulHashTarget = *(uint8_t const*)pbTarget++)) {
				AdvanceHopperGrass = ( AdvanceHopperGrass << 8 ) + ulHashTarget;
				if (AdvanceHopperGrass == ulHashPattern && !memcmp(pbTarget, pbPattern, cbPattern))
				Railgunhits++; //return pbTarget - 4;
			}
			return  NULL;
*/
			pbTarget = pbTarget+cbPattern;
			ulHashPattern = *(unsigned long *)(pbPattern);

			SINGLET = ulHashPattern & 0xFF;
			Quadruplet2nd = SINGLET<<8;
			Quadruplet3rd = SINGLET<<16;
			Quadruplet4th = SINGLET<<24;

			for ( ;; ) {
				AdvanceHopperGrass = 0;
				ulHashTarget = *(unsigned long *)(pbTarget-cbPattern);

			        if ( ulHashPattern == ulHashTarget ) { // Three unnecessary comparisons here, but 'AdvanceHopperGrass' must be calculated - it has a higher priority.
					count = cbPattern-1;
					while ( count && *(char *)(pbPattern+(cbPattern-count)) == *(char *)(pbTarget-count) ) {
						if ( cbPattern-1==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-count) ) AdvanceHopperGrass++;
						count--;
					}
					if ( count == 0) Railgunhits++; //return((pbTarget-cbPattern));
			        } else { // The goal here: to avoid memory accesses by stressing the registers.
					if ( Quadruplet2nd != (ulHashTarget & 0x0000FF00) ) {
						AdvanceHopperGrass++;
						if ( Quadruplet3rd != (ulHashTarget & 0x00FF0000) ) {
							AdvanceHopperGrass++;
							if ( Quadruplet4th != (ulHashTarget & 0xFF000000) ) AdvanceHopperGrass++;
						}
					}
				}

				AdvanceHopperGrass++;

				pbTarget = pbTarget + AdvanceHopperGrass;
				if (pbTarget > pbTargetMax)
					return(NULL);
			}
		} else { //if (cbTarget<961)
			countSTATIC = cbPattern-2-2;

			for (a=0; a < 256; a++) {bm_bc[a]=cbPattern; bm_bc2nd[a]=cbPattern+1;}
			for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1; 
			for (j=0; j < cbPattern; j++) bm_bc2nd[pbPattern[j]]=cbPattern-j; 

			ulHashPattern = *(unsigned long *)(pbPattern); // First four bytes
			//ulHashTarget = *(unsigned short *)(pbPattern+cbPattern-1-1); // Last two bytes
		
			AdvanceHopperGrass = 0;
			i=0;

			// Elsiane r.2  [
			for (a=0; a < 256*256; a++) {bm_Horspool_Order2[a]= cbPattern-1;} // cbPattern-(Order-1) for Horspool; 'memset' if not optimized

			// alfalfa 7 long 6 BBs (al lf fa al lf fa) 3 distinct BBs (al lf fa) 
			// fast 4 0-1-2 fa as st
			for (j=0; j < cbPattern-1; j++) bm_Horspool_Order2[*(unsigned short *)(pbPattern+j)]=j; // Rightmost appearance/position is needed

			// Elsiane r.2 ]

			if ( cbPattern<=32 ) {
				for (j = 0; j < cbPattern; ++j)
					maskv[pat[j]] |= 1 << (cbPattern - 1 - j);
			}

			while (i <= cbTarget-cbPattern-1) { // -1 because Sunday is used
				Gulliver = bm_Horspool_Order2[*(unsigned short *)&pbTarget[i+cbPattern-1-1]];

				if ( Gulliver == cbPattern-2 ) { // CASE #1: means the pair (char order 2) is found
					if ( *(unsigned long *)&pbTarget[i] == ulHashPattern) {
						count = countSTATIC; // Last two chars already matched, to be fixed with -2
						while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)+4) == *(char *)(&pbTarget[i]+(countSTATIC-count)+4) )
							count--;
						if ( count == 0) Railgunhits++; //return(pbTarget+i);
					}
					//i = i + 1; // r.1, obviuosly this is the worst skip so turning to 'SunHorse': lines below
					if ( bm_bc[(unsigned char)pbTarget[i+cbPattern-1]] < bm_bc2nd[(unsigned char)pbTarget[i+(cbPattern)]] )
					         Gulliver =  bm_bc2nd[(unsigned char)pbTarget[i+(cbPattern)]];
					else
					         Gulliver =  bm_bc[(unsigned char)pbTarget[i+cbPattern-1]];
				} else if ( Gulliver != cbPattern-1 ) { // CASE #2: if equal means the pair (char order 2) is not found i.e. Gulliver remains intact, skip the whole pattern and fall back (Order-1) chars i.e. one char for Order 2
					Gulliver = cbPattern - Gulliver - 2; // CASE #3: the pair is found and not as suffix i.e. rightmost position
					if ( cbPattern<=32 ) {
					// BNDM should not dig further to the left: Order 2 (presence + position) vs Order 3 (presence check only)
					// Unrolling three times, should be optimized since a register houses all chars up to Order 4
					mask = maskv[tgt[i + (cbPattern-0) - 1]];
					mask = (mask << 1) & maskv[tgt[i + (cbPattern-1) - 1]];
					mask = (mask << 1) & maskv[tgt[i + (cbPattern-2) - 1]];
					//mask = (mask << 1) & maskv[tgt[i + (cbPattern-3) - 1]]; // Since Pattern is 4[+] long
				        //if (mask) // Means a matching triad exists at some position thus use Order 2's offset
					//else // Means a triad exists NOT i.e. skip the whole pattern and fall back (Order-1) chars i.e. two chars for Order 3
				        if (!mask)
						Gulliver = cbPattern-2;
					}
				}
				i = i + Gulliver;

// 32323218 Order 1 Horspool Skip-table A
// 01234568 Order 1 Horspool Skip-table B
// fa af fa af fa as st Order 2 Horspool Skip-table B
//  0  1  2  3  4  5  6
// HIKARIfast
// fafafast
//   fafafast +2 Order 1 'a' vs 't'
//   fafafast +2 = (cbPattern-SkipB-Order = 8-5-1 = 2) Order 1 'a' vs 't'
//   fafafast +2 = (cbPattern-SkipB-Order = 8-4-2 = 2) Order 2 'fa' vs 'st' i.e. CASE #3

// 76543218 Order 1 Horspool
// lo on ng gp pa ac ce Order 2 Horspool
//  0  1  2  3  4  5  6
// HIKARIfast
// longpace
//   longpace +2 Order 1 'a' vs 'e'
//        longpace +7 = (cbPattern-(Order-1) = 8-(2-1) = 7) Order 2 'fa' vs 'ce' i.e. CASE #2

				AdvanceHopperGrass++;
			}

			if (i == cbTarget-cbPattern) {
				if ( *(unsigned long *)&pbTarget[i] == ulHashPattern) {
					count = countSTATIC;
					while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)+4) == *(char *)(&pbTarget[i]+(countSTATIC-count)+4) )
						count--;
					if ( count == 0) Railgunhits++; //return(pbTarget+i);
				}
				AdvanceHopperGrass++;
			}

			GlobalSP += (int)((double)cbTarget/AdvanceHopperGrass*100);
			GlobalI += AdvanceHopperGrass;
			printf("Skip-Performance(bigger-the-better): %d%%, %d skips/iterations\n",(int)((double)cbTarget/AdvanceHopperGrass*100), AdvanceHopperGrass);
		
			return(NULL);
		} //if (cbTarget<961)
	} //if ( cbPattern<4)
}
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Sunday-Horspool algorithm ]













/*
// DAWG "Directed Acyclic Word Graph" Definition (1): A directed acyclic graph representing the suffixes of a given string in which each edge is labeled with a character. The characters along a path from the root to a node are the substring which the node represents.
// DAWG "Directed Acyclic Word Graph" Definition (2): A finite state machine that recognizes a set of words. 
// BNDM ("Backward Nondeterministic DAWG Matching")
// Mischa Sandberg
char *bndm(char *target, int tgtlen, char *pattern, int patlen)
{
    uint8_t    *tgt = (uint8_t*)target, *pat = (uint8_t*)pattern;
    int        i, j;
    uint32_t   mask, maskv[256] = {0};
 
    for (i = 0; i < patlen; ++i)
        maskv[pat[i]] |= 1 << (patlen - 1 - i);
 
    for (i = 0; i < tgtlen - patlen; i += j) {
        mask = maskv[tgt[patlen - 1 + i]];
        for (j = patlen; mask;) {
            if (!--j) return target + i;
            mask = (mask << 1) & maskv[tgt[i + j - 1]];
        }
    }
    return NULL;
}
*/
char * BNDM_32(char * pbTarget, char * pbPattern, unsigned long cbTarget, unsigned long cbPattern)
{
    uint8_t    *tgt = (uint8_t*)pbTarget, *pat = (uint8_t*)pbPattern;
    int        i, j;
    uint32_t   mask, maskv[256] = {0};
 
    if (cbPattern > cbTarget)
        return(NULL);

    for (i = 0; i < cbPattern; ++i)
        maskv[pat[i]] |= 1 << (cbPattern - 1 - i);
 
    i=0;
    while (i <= cbTarget-cbPattern) {
          mask = maskv[tgt[cbPattern - 1 + i]];
          j = cbPattern;
          while (mask) {
                if (!--j) return pbTarget + i;
                mask = (mask << 1) & maskv[tgt[i + j - 1]];
          }
          i= i + j;
    }
//    for (i = 0; i <= cbTarget - cbPattern; i += j) {
//        mask = maskv[tgt[cbPattern - 1 + i]];
//        for (j = cbPattern; mask;) {
//            if (!--j) return pbTarget + i;
//            mask = (mask << 1) & maskv[tgt[i + j - 1]];
//        }
//    }
    return NULL;
}

// Caution: 1 <= cbPattern <= 32
char * BNDM_32_count_hits(char * pbTarget, char * pbPattern, unsigned long cbTarget, unsigned long cbPattern)
{
    uint8_t    *tgt = (uint8_t*)pbTarget, *pat = (uint8_t*)pbPattern;
    int        i, j;
    uint32_t   mask, maskv[256] = {0};

    unsigned long AdvanceHopperGrass = 0;

    if (cbPattern > cbTarget)
        return(NULL);
 
    for (i = 0; i < cbPattern; ++i) {
        maskv[pat[i]] |= 1 << (cbPattern - 1 - i);
//printf("i=%lu, %c, %lu\n", i, pat[i], maskv[pat[i]]);

//printf("%s ", _ui64toaKAZEzerocomma(i, llTOaDigits2, 10)+(60-2) ); 
//printf("%c ",  pat[i]); 
//printf("%s\n", _ui64toaKAZEzerocomma(maskv[pat[i]], llTOaDigits2, 2)+(60-39) ); 
/*
Pattern: alfalfa
Doing Search for Pattern(7bytes) into String(206908949bytes) as-one-line ...

                                    alf,alfa
00 a 0000,0000,0000,0000,0000,0000,0100,0000
01 l 0000,0000,0000,0000,0000,0000,0010,0000
02 f 0000,0000,0000,0000,0000,0000,0001,0000
03 a 0000,0000,0000,0000,0000,0000,0100,1000
04 l 0000,0000,0000,0000,0000,0000,0010,0100
05 f 0000,0000,0000,0000,0000,0000,0001,0010
06 a 0000,0000,0000,0000,0000,0000,0100,1001

BNDM_32:                         1870KB/clock / 0691%, 29,938392 iterations
Railgun_Quadruplet_7sun:         1642KB/clock / 0748%, 27,654192 iterations
Railgun_Quadruplet_7:            1642KB/clock / 0662%, 31,226521 iterations
Railgun_Quadruplet_7sunhorse:    1202KB/clock / 0786%, 26,295205 iterations
Railgun_Quadruplet_7Elsiane:     1063KB/clock / 0871%, 23,744531 iterations
Railgun_Quadruplet_7Gulliver:    1496KB/clock / 0598%, 34,583650 iterations
Railgun_Quadruplet_7Hasherezade: 1507KB/clock / 0598%, 34,583650 iterations

Pattern: underrated
Doing Search for Pattern(10bytes) into String(206908949bytes) as-one-line ...

                                un,derr,ated
00 u 0000,0000,0000,0000,0000,0010,0000,0000
01 n 0000,0000,0000,0000,0000,0001,0000,0000
02 d 0000,0000,0000,0000,0000,0000,1000,0000
03 e 0000,0000,0000,0000,0000,0000,0100,0000
04 r 0000,0000,0000,0000,0000,0000,0010,0000
05 r 0000,0000,0000,0000,0000,0000,0011,0000
06 a 0000,0000,0000,0000,0000,0000,0000,1000
07 t 0000,0000,0000,0000,0000,0000,0000,0100
08 e 0000,0000,0000,0000,0000,0000,0100,0010
09 d 0000,0000,0000,0000,0000,0000,1000,0001

BNDM_32:                         1287KB/clock / 0965%, 21,438744 iterations
Railgun_Quadruplet_7sun:         1942KB/clock / 0894%, 23,127481 iterations
Railgun_Quadruplet_7:            1942KB/clock / 0804%, 25,703793 iterations
Railgun_Quadruplet_7sunhorse:    1141KB/clock / 1025%, 20,167543 iterations
Railgun_Quadruplet_7Elsiane:     0768KB/clock / 1072%, 19,285870 iterations
Railgun_Quadruplet_7Gulliver:    1906KB/clock / 0885%, 23,374120 iterations
Railgun_Quadruplet_7Hasherezade: 1870KB/clock / 0885%, 23,374120 iterations

Pattern: bye-bye-bye-bye-bye-
Doing Search for Pattern(20bytes) into String(206908949bytes) as-one-line ...

                    bye-,bye-,bye-,bye-,bye-
00 b 0000,0000,0000,1000,0000,0000,0000,0000
01 y 0000,0000,0000,0100,0000,0000,0000,0000
02 e 0000,0000,0000,0010,0000,0000,0000,0000
03 - 0000,0000,0000,0001,0000,0000,0000,0000
04 b 0000,0000,0000,1000,1000,0000,0000,0000
05 y 0000,0000,0000,0100,0100,0000,0000,0000
06 e 0000,0000,0000,0010,0010,0000,0000,0000
07 - 0000,0000,0000,0001,0001,0000,0000,0000
08 b 0000,0000,0000,1000,1000,1000,0000,0000
09 y 0000,0000,0000,0100,0100,0100,0000,0000
10 e 0000,0000,0000,0010,0010,0010,0000,0000
11 - 0000,0000,0000,0001,0001,0001,0000,0000
12 b 0000,0000,0000,1000,1000,1000,1000,0000
13 y 0000,0000,0000,0100,0100,0100,0100,0000
14 e 0000,0000,0000,0010,0010,0010,0010,0000
15 - 0000,0000,0000,0001,0001,0001,0001,0000
16 b 0000,0000,0000,1000,1000,1000,1000,1000
17 y 0000,0000,0000,0100,0100,0100,0100,0100
18 e 0000,0000,0000,0010,0010,0010,0010,0010
19 - 0000,0000,0000,0001,0001,0001,0001,0001

BNDM_32:                         3483KB/clock / 1988%, 10,402957 iterations
Railgun_Quadruplet_7sun:         3207KB/clock / 1891%, 10,940613 iterations
Railgun_Quadruplet_7:            3259KB/clock / 1806%, 11,450773 iterations
Railgun_Quadruplet_7sunhorse:    2557KB/clock / 2069%, 09,995656 iterations
Railgun_Quadruplet_7Elsiane:     2104KB/clock / 2149%, 09,627265 iterations
Railgun_Quadruplet_7Gulliver:    3259KB/clock / 1898%, 10,898305 iterations
Railgun_Quadruplet_7Hasherezade: 3157KB/clock / 1899%, 10,892762 iterations

Pattern: fastest fox with biggest strides
Doing Search for Pattern(32bytes) into String(206908949bytes) as-one-line ...

     fast,est ,fox ,with, big,gest, str,ides
00 f 1000,0000,0000,0000,0000,0000,0000,0000
01 a 0100,0000,0000,0000,0000,0000,0000,0000
02 s 0010,0000,0000,0000,0000,0000,0000,0000
03 t 0001,0000,0000,0000,0000,0000,0000,0000
04 e 0000,1000,0000,0000,0000,0000,0000,0000
05 s 0010,0100,0000,0000,0000,0000,0000,0000
06 t 0001,0010,0000,0000,0000,0000,0000,0000
07   0000,0001,0000,0000,0000,0000,0000,0000
08 f 1000,0000,1000,0000,0000,0000,0000,0000
09 o 0000,0000,0100,0000,0000,0000,0000,0000
10 x 0000,0000,0010,0000,0000,0000,0000,0000
11   0000,0001,0001,0000,0000,0000,0000,0000
12 w 0000,0000,0000,1000,0000,0000,0000,0000
13 i 0000,0000,0000,0100,0000,0000,0000,0000
14 t 0001,0010,0000,0010,0000,0000,0000,0000
15 h 0000,0000,0000,0001,0000,0000,0000,0000
16   0000,0001,0001,0000,1000,0000,0000,0000
17 b 0000,0000,0000,0000,0100,0000,0000,0000
18 i 0000,0000,0000,0100,0010,0000,0000,0000
19 g 0000,0000,0000,0000,0001,0000,0000,0000
20 g 0000,0000,0000,0000,0001,1000,0000,0000
21 e 0000,1000,0000,0000,0000,0100,0000,0000
22 s 0010,0100,0000,0000,0000,0010,0000,0000
23 t 0001,0010,0000,0010,0000,0001,0000,0000
24   0000,0001,0001,0000,1000,0000,1000,0000
25 s 0010,0100,0000,0000,0000,0010,0100,0000
26 t 0001,0010,0000,0010,0000,0001,0010,0000
27 r 0000,0000,0000,0000,0000,0000,0001,0000
28 i 0000,0000,0000,0100,0010,0000,0000,1000
29 d 0000,0000,0000,0000,0000,0000,0000,0100
30 e 0000,1000,0000,0000,0000,0100,0000,0010
31 s 0010,0100,0000,0000,0000,0010,0100,0001

BNDM_32:                         2886KB/clock / 3113%, 06,644708 iterations
Railgun_Quadruplet_7sun:         2694KB/clock / 1584%, 13,060463 iterations
Railgun_Quadruplet_7:            2767KB/clock / 1511%, 13,689243 iterations
Railgun_Quadruplet_7sunhorse:    1820KB/clock / 2138%, 09,677267 iterations
Railgun_Quadruplet_7Elsiane:     1566KB/clock / 2143%, 09,652548 iterations
Railgun_Quadruplet_7Gulliver:    3108KB/clock / 2924%, 07,074287 iterations
Railgun_Quadruplet_7Hasherezade: 2971KB/clock / 3041%, 06,801754 iterations

*/

}



    i=0;
    while (i <= cbTarget-cbPattern) {
          mask = maskv[tgt[cbPattern - 1 + i]];
          j = cbPattern;
          while (mask) {
                if (j==1) {Railgunhits++; mask = 0;} //return pbTarget + i;
                else {j--; mask = (mask << 1) & maskv[tgt[i + j - 1]];}
          }
          i= i + j;
 
//    for (i = 0; i <= cbTarget - cbPattern; i += j) {
//        mask = maskv[tgt[cbPattern - 1 + i]];
//        for (j = cbPattern; mask;) {
//            if (!--j) {Railgunhits++; j++; mask = 0;} //return pbTarget + i;
//            else mask = (mask << 1) & maskv[tgt[i + j - 1]];
//        }
    AdvanceHopperGrass++;
    }

    GlobalSP += (int)((double)cbTarget/AdvanceHopperGrass*100);
    GlobalI += AdvanceHopperGrass;
    printf("Skip-Performance(bigger-the-better): %d%%, %d skips/iterations\n",(int)((double)cbTarget/AdvanceHopperGrass*100), AdvanceHopperGrass);

    return NULL;
}



// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm [
// Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char.
char * Railgun_Quadruplet_7_count_hits (char * pbTarget,
     char * pbPattern,
     unsigned long cbTarget,
     unsigned long cbPattern)
{
    char * pbTargetMax = pbTarget + cbTarget;
register unsigned long ulHashPattern;
    unsigned long ulHashTarget;
    //unsigned long count; //r.6+
    signed long count;
    //unsigned long countSTATIC; //r.6+
    signed long countSTATIC;
//  unsigned long countRemainder;

/*
    const unsigned char SINGLET = *(char *)(pbPattern);
    const unsigned long Quadruplet2nd = SINGLET<<8;
    const unsigned long Quadruplet3rd = SINGLET<<16;
    const unsigned long Quadruplet4th = SINGLET<<24;
*/
    unsigned char SINGLET;
    unsigned long Quadruplet2nd;
    unsigned long Quadruplet3rd;
    unsigned long Quadruplet4th;

    unsigned long  AdvanceHopperGrass;

    long i; //BMH needed
    int a, j, bm_bc[ASIZE]; //BMH needed
    unsigned char ch; //BMH needed
    unsigned long chchchch; //BMH needed
    unsigned short chchchchSHORT; //BMH needed
    //unsigned char * chPTR;
    unsigned char firstch; //BMH needed
//    unsigned char lastch, firstch; //BMH needed

    if (cbPattern > cbTarget)
        return(NULL);

// Doesn't work when cbPattern = 1
// The next IF-fragment works very well with cbPattern>1, OBVIOUSLY IT MUST BE UNROLLED(but crippled with less functionality) SINCE either cbPattern=2 or cbPattern=3!
if ( cbPattern<4) { // This IF makes me unhappy: it slows down from 390KB/clock to 367KB/clock for 'fast' pattern. This fragment(for 2..3 pattern lengths) is needed because I need a function different than strchr but sticking to strstr i.e. lengths above 1 are to be handled.
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));
//        countSTATIC = cbPattern-2;

if ( cbPattern==3) {
    for ( ;; )
    {
        if ( ulHashPattern == ( (*(char *)(pbTarget-3))<<8 ) + *(pbTarget-1) ) {
         if ( *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) Railgunhits++; //return((pbTarget-3));
        }
        if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) pbTarget++;
        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else {
}
    for ( ;; )
    {
        // The line below gives for 'cbPattern'>=1:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/543
        // Karp_Rabin_Kaze_4_OCTETS performance: 372KB/clock
/*
        if ( (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
            return((long)(pbTarget-cbPattern));
*/

        // The fragment below gives for 'cbPattern'>=2:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/546
        // Karp_Rabin_Kaze_4_OCTETS performance: 370KB/clock

/*
//For 2 and 3 [
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
//         count = countSTATIC;
         count = cbPattern-2;
//         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
         while ( count && *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) { // Crippling i.e. only 2 and 3 chars are allowed!
               count--;
         }
         if ( count == 0) return((pbTarget-cbPattern));
        }
        if ( (char)(ulHashPattern>>8) != *(pbTarget-cbPattern+1) ) pbTarget++;
//For 2 and 3 ]
*/


        if ( ulHashPattern == ( (*(char *)(pbTarget-2))<<8 ) + *(pbTarget-1) )
            Railgunhits++; //return((pbTarget-2));
        if ( (char)(ulHashPattern>>8) != *(pbTarget-1) ) pbTarget++;


        // The fragment below gives for 'cbPattern'>=2:
	// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/554
	// Karp_Rabin_Kaze_4_OCTETS performance: 364KB/clock
/*
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
         count = countSTATIC>>2;
         countRemainder = countSTATIC % 4;

         while ( count && *(unsigned long *)(pbPattern+1+((count-1)<<2)) == *(unsigned long *)(pbTarget-cbPattern+1+((count-1)<<2)) ) {
               count--;
         }
	 //if (count == 0) {  // Disastrous degradation only from this line(317KB/clock when 1+2x4+2+1 bytes pattern: 'skillessness'; 312KB/clock when 1+1x4+2+1 bytes pattern: 'underdog'), otherwise 368KB/clock.
         while ( countRemainder && *(char *)(pbPattern+1+(countSTATIC-countRemainder)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-countRemainder)) ) {
               countRemainder--;
         }
         //if ( countRemainder == 0) return((long)(pbTarget-cbPattern));
         if ( count+countRemainder == 0) return((long)(pbTarget-cbPattern));
         //}
        }
*/

        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else { //if ( cbPattern<4)
if (cbTarget<961) // This value is arbitrary(don't know how exactly), it ensures(at least must) better performance than 'Boyer_Moore_Horspool'.
{
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = *(unsigned long *)(pbPattern);
//        countSTATIC = cbPattern-1;

    //SINGLET = *(char *)(pbPattern);
    SINGLET = ulHashPattern & 0xFF;
    Quadruplet2nd = SINGLET<<8;
    Quadruplet3rd = SINGLET<<16;
    Quadruplet4th = SINGLET<<24;

    for ( ;; )
    {
	AdvanceHopperGrass = 0;
	ulHashTarget = *(unsigned long *)(pbTarget-cbPattern);

        if ( ulHashPattern == ulHashTarget ) { // Three unnecessary comparisons here, but 'AdvanceHopperGrass' must be calculated - it has a higher priority.
//         count = countSTATIC;
//         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
//	       if ( countSTATIC==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) AdvanceHopperGrass++;
//               count--;
//         }
         count = cbPattern-1;
         while ( count && *(char *)(pbPattern+(cbPattern-count)) == *(char *)(pbTarget-count) ) {
	       if ( cbPattern-1==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-count) ) AdvanceHopperGrass++;
               count--;
         }
         if ( count == 0) Railgunhits++; //return((pbTarget-cbPattern));
        } else { // The goal here: to avoid memory accesses by stressing the registers.
    if ( Quadruplet2nd != (ulHashTarget & 0x0000FF00) ) {
         AdvanceHopperGrass++;
         if ( Quadruplet3rd != (ulHashTarget & 0x00FF0000) ) {
              AdvanceHopperGrass++;
              if ( Quadruplet4th != (ulHashTarget & 0xFF000000) ) AdvanceHopperGrass++;
         }
    }
	}

	AdvanceHopperGrass++;

	pbTarget = pbTarget + AdvanceHopperGrass;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else { //if (cbTarget<961)
        //countSTATIC = cbPattern-2; //r.6+
        //countSTATIC = cbPattern-2-3;
        //countSTATIC = cbPattern-2-2; // r.6+++ I suppose that the awful degradation comes from 2bytes more (from either 'if (countSTATIC<0) countSTATIC=0;' or 'count >0' fixes) which make the function unfittable in code cache lines?!
        //countSTATIC = cbPattern-2-3; // r.7- At last no recompared bytes in-between chars
        countSTATIC = cbPattern-2-2; // r.7 
        ulHashPattern = *(unsigned long *)(pbPattern);

        //chPTR=(unsigned char *)&chchchch+3;
// Next line fixes the BUG from r.6++: but with awful speed degradation! So the bug is fixed in the definitions by setting 'countSTATIC = cbPattern-2-2;', bug appears only for patterns with lengths of 4, The setback is one unnecessary comparison for 5bytes patterns, stupidly such setback exists (from before) for 4bytes as well.
//if (countSTATIC<0) countSTATIC=0;
    /* Preprocessing */
    for (a=0; a < ASIZE; a++) bm_bc[a]=cbPattern;
    for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1;

    /* Searching */
    //lastch=pbPattern[cbPattern-1];
    //firstch=pbPattern[0];

    AdvanceHopperGrass = 0;
    i=0;
    while (i <= cbTarget-cbPattern) {
       //ch=pbTarget[i+cbPattern-1];
       //ch=pbTarget[i];
          //if ( pbTarget[i] == pbPattern[0] && *(unsigned long *)&pbTarget[i+cbPattern-1-3] == ulHashPattern) // No problema here since we have 4[+] long pattern here. Overlapping (1 byte recompared) when length=4, grmbl.
          if ( *(unsigned long *)&pbTarget[i] == ulHashPattern ) // The lesson I learned from r.7- now applied in r.7: instead of extracting 'ch' having higher address now the lower address is extracted first in order (hopefully, the test confirms it) the next 32bytes (including 'ch') to be cached i.e. to comparison part is faster.
             {
         count = countSTATIC;
         while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)+4) == *(char *)(&pbTarget[i]+(countSTATIC-count)+4) ) { // if pattern length is 4 or 5 we have count=-1 and count=0 respectively i.e. no need of comparing in-between chars.
               count--;
         }
         if ( count == 0) Railgunhits++; //return(pbTarget+i);
	     }
       i= i + bm_bc[(unsigned char)pbTarget[i+cbPattern-1]];
AdvanceHopperGrass++;
    }

GlobalSP += (int)((double)cbTarget/AdvanceHopperGrass*100);
GlobalI += AdvanceHopperGrass;
printf("Skip-Performance(bigger-the-better): %d%%, %d skips/iterations\n",(int)((double)cbTarget/AdvanceHopperGrass*100), AdvanceHopperGrass);

    return(NULL);
} //if (cbTarget<961)
} //if ( cbPattern<4)
}
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm ]





// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm [
// Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char.
char * Railgun_Quadruplet_7 (char * pbTarget,
     char * pbPattern,
     unsigned long cbTarget,
     unsigned long cbPattern)
{
    char * pbTargetMax = pbTarget + cbTarget;
    register unsigned long  ulHashPattern;
    unsigned long ulHashTarget;
    //unsigned long count; //r.6+
    signed long count;
    //unsigned long countSTATIC; //r.6+
    signed long countSTATIC;
//  unsigned long countRemainder;

/*
    const unsigned char SINGLET = *(char *)(pbPattern);
    const unsigned long Quadruplet2nd = SINGLET<<8;
    const unsigned long Quadruplet3rd = SINGLET<<16;
    const unsigned long Quadruplet4th = SINGLET<<24;
*/
    unsigned char SINGLET;
    unsigned long Quadruplet2nd;
    unsigned long Quadruplet3rd;
    unsigned long Quadruplet4th;

    unsigned long  AdvanceHopperGrass;

    long i; //BMH needed
    int a, j, bm_bc[ASIZE]; //BMH needed
    unsigned char ch; //BMH needed
    unsigned long chchchch; //BMH needed
//    unsigned char lastch, firstch; //BMH needed

    if (cbPattern > cbTarget)
        return(NULL);

// Doesn't work when cbPattern = 1
// The next IF-fragment works very well with cbPattern>1, OBVIOUSLY IT MUST BE UNROLLED(but crippled with less functionality) SINCE either cbPattern=2 or cbPattern=3!
if ( cbPattern<4) { // This IF makes me unhappy: it slows down from 390KB/clock to 367KB/clock for 'fast' pattern. This fragment(for 2..3 pattern lengths) is needed because I need a function different than strchr but sticking to strstr i.e. lengths above 1 are to be handled.
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));
//        countSTATIC = cbPattern-2;

if ( cbPattern==3) {
    for ( ;; )
    {
        if ( ulHashPattern == ( (*(char *)(pbTarget-3))<<8 ) + *(pbTarget-1) ) {
         if ( *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) return((pbTarget-3));
        }
        if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) pbTarget++;
        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else {
}
    for ( ;; )
    {
        // The line below gives for 'cbPattern'>=1:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/543
        // Karp_Rabin_Kaze_4_OCTETS performance: 372KB/clock
/*
        if ( (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
            return((long)(pbTarget-cbPattern));
*/

        // The fragment below gives for 'cbPattern'>=2:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/546
        // Karp_Rabin_Kaze_4_OCTETS performance: 370KB/clock

/*
//For 2 and 3 [
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
//         count = countSTATIC;
         count = cbPattern-2;
//         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
         while ( count && *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) { // Crippling i.e. only 2 and 3 chars are allowed!
               count--;
         }
         if ( count == 0) return((pbTarget-cbPattern));
        }
        if ( (char)(ulHashPattern>>8) != *(pbTarget-cbPattern+1) ) pbTarget++;
//For 2 and 3 ]
*/


        if ( ulHashPattern == ( (*(char *)(pbTarget-2))<<8 ) + *(pbTarget-1) )
            return((pbTarget-2));
        if ( (char)(ulHashPattern>>8) != *(pbTarget-1) ) pbTarget++;


        // The fragment below gives for 'cbPattern'>=2:
	// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/554
	// Karp_Rabin_Kaze_4_OCTETS performance: 364KB/clock
/*
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
         count = countSTATIC>>2;
         countRemainder = countSTATIC % 4;

         while ( count && *(unsigned long *)(pbPattern+1+((count-1)<<2)) == *(unsigned long *)(pbTarget-cbPattern+1+((count-1)<<2)) ) {
               count--;
         }
	 //if (count == 0) {  // Disastrous degradation only from this line(317KB/clock when 1+2x4+2+1 bytes pattern: 'skillessness'; 312KB/clock when 1+1x4+2+1 bytes pattern: 'underdog'), otherwise 368KB/clock.
         while ( countRemainder && *(char *)(pbPattern+1+(countSTATIC-countRemainder)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-countRemainder)) ) {
               countRemainder--;
         }
         //if ( countRemainder == 0) return((long)(pbTarget-cbPattern));
         if ( count+countRemainder == 0) return((long)(pbTarget-cbPattern));
         //}
        }
*/

        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else { //if ( cbPattern<4)
if (cbTarget<961) // This value is arbitrary(don't know how exactly), it ensures(at least must) better performance than 'Boyer_Moore_Horspool'.
{
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = *(unsigned long *)(pbPattern);
//        countSTATIC = cbPattern-1;

    //SINGLET = *(char *)(pbPattern);
    SINGLET = ulHashPattern & 0xFF;
    Quadruplet2nd = SINGLET<<8;
    Quadruplet3rd = SINGLET<<16;
    Quadruplet4th = SINGLET<<24;

    for ( ;; )
    {
	AdvanceHopperGrass = 0;
	ulHashTarget = *(unsigned long *)(pbTarget-cbPattern);

        if ( ulHashPattern == ulHashTarget ) { // Three unnecessary comparisons here, but 'AdvanceHopperGrass' must be calculated - it has a higher priority.
//         count = countSTATIC;
//         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
//	       if ( countSTATIC==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) AdvanceHopperGrass++;
//               count--;
//         }
         count = cbPattern-1;
         while ( count && *(char *)(pbPattern+(cbPattern-count)) == *(char *)(pbTarget-count) ) {
	       if ( cbPattern-1==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-count) ) AdvanceHopperGrass++;
               count--;
         }
         if ( count == 0) return((pbTarget-cbPattern));
        } else { // The goal here: to avoid memory accesses by stressing the registers.
    if ( Quadruplet2nd != (ulHashTarget & 0x0000FF00) ) {
         AdvanceHopperGrass++;
         if ( Quadruplet3rd != (ulHashTarget & 0x00FF0000) ) {
              AdvanceHopperGrass++;
              if ( Quadruplet4th != (ulHashTarget & 0xFF000000) ) AdvanceHopperGrass++;
         }
    }
	}

	AdvanceHopperGrass++;

	pbTarget = pbTarget + AdvanceHopperGrass;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else { //if (cbTarget<961)
        //countSTATIC = cbPattern-2; //r.6+
        //countSTATIC = cbPattern-2-3;
        //countSTATIC = cbPattern-2-2; // r.6+++ I suppose that the awful degradation comes from 2bytes more (from either 'if (countSTATIC<0) countSTATIC=0;' or 'count >0' fixes) which make the function unfittable in code cache lines?!
        //countSTATIC = cbPattern-2-3; // r.7- At last no recompared bytes in-between chars
        countSTATIC = cbPattern-2-2; // r.7 
        ulHashPattern = *(unsigned long *)(pbPattern);

        //chPTR=(unsigned char *)&chchchch+3;
// Next line fixes the BUG from r.6++: but with awful speed degradation! So the bug is fixed in the definitions by setting 'countSTATIC = cbPattern-2-2;', bug appears only for patterns with lengths of 4, The setback is one unnecessary comparison for 5bytes patterns, stupidly such setback exists (from before) for 4bytes as well.
//if (countSTATIC<0) countSTATIC=0;
    /* Preprocessing */
    for (a=0; a < ASIZE; a++) bm_bc[a]=cbPattern;
    for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1;

    /* Searching */
    //lastch=pbPattern[cbPattern-1];
    //firstch=pbPattern[0];
    i=0;
    while (i <= cbTarget-cbPattern) {
       //ch=pbTarget[i+cbPattern-1];
       //ch=pbTarget[i];
          //if ( pbTarget[i] == pbPattern[0] && *(unsigned long *)&pbTarget[i+cbPattern-1-3] == ulHashPattern) // No problema here since we have 4[+] long pattern here. Overlapping (1 byte recompared) when length=4, grmbl.
          if ( *(unsigned long *)&pbTarget[i] == ulHashPattern ) // The lesson I learned from r.7- now applied in r.7: instead of extracting 'ch' having higher address now the lower address is extracted first in order (hopefully, the test confirms it) the next 32bytes (including 'ch') to be cached i.e. to comparison part is faster.
             {
         count = countSTATIC;
         while ( count !=0 && *(char *)(pbPattern+(countSTATIC-count)+4) == *(char *)(&pbTarget[i]+(countSTATIC-count)+4) ) { // if pattern length is 4 or 5 we have count=-1 and count=0 respectively i.e. no need of comparing in-between chars.
               count--;
         }
         if ( count == 0) return(pbTarget+i);
	     }
       i= i + bm_bc[(unsigned char)pbTarget[i+cbPattern-1]];
GlobalI++;
    }
    return(NULL);
} //if (cbTarget<961)
} //if ( cbPattern<4)
}
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm ]



// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm [
// Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char.
char * Railgun (char * pbTarget,
     char * pbPattern,
     unsigned long cbTarget,
     unsigned long cbPattern)
{
    char *  pbTargetMax = pbTarget + cbTarget;
    register unsigned long  ulHashPattern;
    unsigned long  ulHashTarget;
    unsigned long  count;
    unsigned long  countSTATIC, countRemainder;

    long i; //BMH needed
    int a, j, bm_bc[ASIZE]; //BMH needed
    unsigned char ch; //BMH needed
//    unsigned char lastch, firstch; //BMH needed

    if (cbPattern > cbTarget)
        return(NULL);

        countSTATIC = cbPattern-2;

// Doesn't work when cbPattern = 1
if (cbTarget<961) // This value is arbitrary(don't know how exactly), it ensures(at least must) better performance than 'Boyer_Moore_Horspool'.
{
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));

    for ( ;; )
    {
        // The line below gives for 'cbPattern'>=1:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/543
        // Karp_Rabin_Kaze_4_OCTETS performance: 372KB/clock
/*
        if ( (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
            return((long)(pbTarget-cbPattern));
*/

        // The fragment below gives for 'cbPattern'>=2:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/546
        // Karp_Rabin_Kaze_4_OCTETS performance: 370KB/clock

        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
         count = countSTATIC;
         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
               count--;
         }
         if ( count == 0) return((pbTarget-cbPattern));
        }

        // The fragment below gives for 'cbPattern'>=2:
	// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/554
	// Karp_Rabin_Kaze_4_OCTETS performance: 364KB/clock
/*
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
         count = countSTATIC>>2;
         countRemainder = countSTATIC % 4;

         while ( count && *(unsigned long *)(pbPattern+1+((count-1)<<2)) == *(unsigned long *)(pbTarget-cbPattern+1+((count-1)<<2)) ) {
               count--;
         }
	 //if (count == 0) {  // Disastrous degradation only from this line(317KB/clock when 1+2x4+2+1 bytes pattern: 'skillessness'; 312KB/clock when 1+1x4+2+1 bytes pattern: 'underdog'), otherwise 368KB/clock.
         while ( countRemainder && *(char *)(pbPattern+1+(countSTATIC-countRemainder)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-countRemainder)) ) {
               countRemainder--;
         }
         //if ( countRemainder == 0) return((long)(pbTarget-cbPattern));
         if ( count+countRemainder == 0) return((long)(pbTarget-cbPattern));
         //}
        }
*/

        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
}
else
{
    /* Preprocessing */
    for (a=0; a < ASIZE; a++) bm_bc[a]=cbPattern;
    for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1;

    /* Searching */
    //lastch=pbPattern[cbPattern-1];
    //firstch=pbPattern[0];
    i=0;
    while (i <= cbTarget-cbPattern) {
       ch=pbTarget[i+cbPattern-1];
       //if (ch ==lastch)
          //if (memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) OUTPUT(i);
          //if (ch == lastch && pbTarget[i] == firstch && memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) return(i);  // Kaze: The idea(to prevent execution of slower 'memcmp') is borrowed from Karp-Rabin i.e. to perform a slower check only when the target "looks like".
          if (ch == pbPattern[cbPattern-1] && pbTarget[i] == pbPattern[0])
             {
         count = countSTATIC;
         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(&pbTarget[i]+1+(countSTATIC-count)) ) {
               count--;
         }
         if ( count == 0) return(pbTarget+i);
	     }
       i+=bm_bc[ch];
    }
    return(NULL);
}
}
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm ]


// ### Railgun_totalhits [
char * Railgun_totalhits (char * pbTarget,
     char * pbPattern,
     unsigned long cbTarget,
     unsigned long cbPattern)
{
    char *  pbTargetMax = pbTarget + cbTarget;
    register unsigned long  ulHashPattern;
    unsigned long  ulHashTarget;
    unsigned long  count;
    unsigned long  countSTATIC, countRemainder;

    long i; //BMH needed
    int a, j, bm_bc[ASIZE]; //BMH needed
    unsigned char ch; //BMH needed
//    unsigned char lastch, firstch; //BMH needed

    if (cbPattern > cbTarget)
        return(NULL);

        countSTATIC = cbPattern-2;

// Doesn't work when cbPattern = 1
if (cbTarget<961) // This value is arbitrary(don't know how exactly), it ensures(at least must) better performance than 'Boyer_Moore_Horspool'.
{
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));

    for ( ;; )
    {
        // The line below gives for 'cbPattern'>=1:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/543
        // Karp_Rabin_Kaze_4_OCTETS performance: 372KB/clock
/*
        if ( (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
            return((long)(pbTarget-cbPattern));
*/

        // The fragment below gives for 'cbPattern'>=2:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/546
        // Karp_Rabin_Kaze_4_OCTETS performance: 370KB/clock

        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
         count = countSTATIC;
         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
               count--;
         }
         if ( count == 0) Railgunhits++; //return((pbTarget-cbPattern));
        }

        // The fragment below gives for 'cbPattern'>=2:
	// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/554
	// Karp_Rabin_Kaze_4_OCTETS performance: 364KB/clock
/*
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
         count = countSTATIC>>2;
         countRemainder = countSTATIC % 4;

         while ( count && *(unsigned long *)(pbPattern+1+((count-1)<<2)) == *(unsigned long *)(pbTarget-cbPattern+1+((count-1)<<2)) ) {
               count--;
         }
	 //if (count == 0) {  // Disastrous degradation only from this line(317KB/clock when 1+2x4+2+1 bytes pattern: 'skillessness'; 312KB/clock when 1+1x4+2+1 bytes pattern: 'underdog'), otherwise 368KB/clock.
         while ( countRemainder && *(char *)(pbPattern+1+(countSTATIC-countRemainder)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-countRemainder)) ) {
               countRemainder--;
         }
         //if ( countRemainder == 0) return((long)(pbTarget-cbPattern));
         if ( count+countRemainder == 0) return((long)(pbTarget-cbPattern));
         //}
        }
*/

        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
}
else
{
    /* Preprocessing */
    for (a=0; a < ASIZE; a++) bm_bc[a]=cbPattern;
    for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1;

    /* Searching */
    //lastch=pbPattern[cbPattern-1];
    //firstch=pbPattern[0];
    i=0;
    while (i <= cbTarget-cbPattern) {
       ch=pbTarget[i+cbPattern-1];
       //if (ch ==lastch)
          //if (memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) OUTPUT(i);
          //if (ch == lastch && pbTarget[i] == firstch && memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) return(i);  // Kaze: The idea(to prevent execution of slower 'memcmp') is borrowed from Karp-Rabin i.e. to perform a slower check only when the target "looks like".
          if (ch == pbPattern[cbPattern-1] && pbTarget[i] == pbPattern[0])
             {
         count = countSTATIC;
         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(&pbTarget[i]+1+(countSTATIC-count)) ) {
               count--;
         }
         if ( count == 0) Railgunhits++; //return(pbTarget+i);
	     }
       i+=bm_bc[ch];
    }
    return(NULL);
}
}
// ### Railgun_totalhits ]



//#define MAX(a,b)  (((a) > (b)) ? (a) : (b))

/* Computing of the maximal suffix for <= */
int maxSuf(char *x, int m, int *p) {
   int ms, j, k;
   char a, b;

   ms = -1;
   j = 0;
   k = *p = 1;
   while (j + k < m) {
      a = x[j + k];
      b = x[ms + k];
      if (a < b) {
         j += k;
         k = 1;
         *p = j - ms;
      }
      else
         if (a == b)
            if (k != *p)
               ++k;
            else {
               j += *p;
               k = 1;
            }
         else { /* a > b */
            ms = j;
            j = ms + 1;
            k = *p = 1;
         }
   }
   return(ms);
}
 
/* Computing of the maximal suffix for >= */
int maxSufTilde(char *x, int m, int *p) {
   int ms, j, k;
   char a, b;

   ms = -1;
   j = 0;
   k = *p = 1;
   while (j + k < m) {
      a = x[j + k];
      b = x[ms + k];
      if (a > b) {
         j += k;
         k = 1;
         *p = j - ms;
      }
      else
         if (a == b)
            if (k != *p)
               ++k;
            else {
               j += *p;
               k = 1;
            }
         else { /* a < b */
            ms = j;
            j = ms + 1;
            k = *p = 1;
         }
   }
   return(ms);
}


/* Two Way string matching algorithm. */
char * TW(char *y, char *x, int n, int m) {
   int i, j, ell, memory, p, per, q;

    unsigned long  AdvanceHopperGrass=0;

   /* Preprocessing */
   i = maxSuf(x, m, &p);
   j = maxSufTilde(x, m, &q);
   if (i > j) {
      ell = i;
      per = p;
   }
   else {
      ell = j;
      per = q;
   }

   /* Searching */
   if (memcmp(x, x + per, ell + 1) == 0) {
      j = 0;
      memory = -1;
      while (j <= n - m) {
         i = MAX(ell, memory) + 1;
         while (i < m && x[i] == y[i + j])
            ++i;
         if (i >= m) {
            i = ell;
            while (i > memory && x[i] == y[i + j])
               --i;
            if (i <= memory) Railgunhits++; //return (y+j);
               //OUTPUT(j);
            j += per;
            memory = m - per - 1;
         }
         else {
            j += (i - ell);
            memory = -1;
         }
AdvanceHopperGrass++;
      }
   }
   else {
      per = MAX(ell + 1, m - ell - 1) + 1;
      j = 0;
      while (j <= n - m) {
         i = ell + 1;
         while (i < m && x[i] == y[i + j])
            ++i;
         if (i >= m) {
            i = ell;
            while (i >= 0 && x[i] == y[i + j])
               --i;
            if (i < 0) Railgunhits++; //return (y+j);
               //OUTPUT(j);
            j += per;
         }
         else
            j += (i - ell);
AdvanceHopperGrass++;
      }
   }

GlobalSP += (int)((double)n/AdvanceHopperGrass*100);
GlobalI += AdvanceHopperGrass;
printf("Skip-Performance(bigger-the-better): %d%%, %d skips/iterations\n",(int)((double)n/AdvanceHopperGrass*100), AdvanceHopperGrass);
 return (y); // a dummy return, since this variant count hits only!
}


void preBmBc(char *x, int m, int bmBc[]) {
   int i;
 
   for (i = 0; i < ASIZE; ++i)
      bmBc[i] = m;
   for (i = 0; i < m - 1; ++i)
      bmBc[x[i]] = m - i - 1;
}
 
 
void suffixes(char *x, int m, int *suff) {
   int f, g, i;
 
   suff[m - 1] = m;
   g = m - 1;
   for (i = m - 2; i >= 0; --i) {
      if (i > g && suff[i + m - 1 - f] < i - g)
         suff[i] = suff[i + m - 1 - f];
      else {
         if (i < g)
            g = i;
         f = i;
         while (g >= 0 && x[g] == x[g + m - 1 - f])
            --g;
         suff[i] = f - g;
      }
   }
}
 
void preBmGs(char *x, int m, int bmGs[]) {
   int i, j, suff[XSIZE];
 
   suffixes(x, m, suff);
 
   for (i = 0; i < m; ++i)
      bmGs[i] = m;
   j = 0;
   for (i = m - 1; i >= 0; --i)
      if (suff[i] == i + 1)
         for (; j < m - 1 - i; ++j)
            if (bmGs[j] == m)
               bmGs[j] = m - 1 - i;
   for (i = 0; i <= m - 2; ++i)
      bmGs[m - 1 - suff[i]] = m - 1 - i;
}
 

//void BM(char *x, int m, char *y, int n) {
char * BM(char *y, char *x, int n, int m ) {

   int i, j, bmGs[XSIZE], bmBc[ASIZE];
   unsigned long  AdvanceHopperGrass=0;
 
   /* Preprocessing */
   preBmGs(x, m, bmGs);
   preBmBc(x, m, bmBc);
 
   /* Searching */
   j = 0;
   while (j <= n - m) {
      for (i = m - 1; i >= 0 && x[i] == y[i + j]; --i);
      if (i < 0) {
         Railgunhits++; //return(y+j); //OUTPUT(j);
         j += bmGs[0];
      }
      else
         j += MAX(bmGs[i], bmBc[y[i + j]] - m + 1 + i);
AdvanceHopperGrass++;
   }

GlobalSP += (int)((double)n/AdvanceHopperGrass*100);
GlobalI += AdvanceHopperGrass;
printf("Skip-Performance(bigger-the-better): %d%%, %d skips/iterations\n",(int)((double)n/AdvanceHopperGrass*100), AdvanceHopperGrass);

return(NULL);
}





// ### Brute force 'Dummy' algorithm [
   char * Brute_Force_Dummy(char *y, char *x, long n, int m) {
    long i, j;
    unsigned long  AdvanceHopperGrass=0;
  
    /* Searching */
    for (i=0; i <= n-m; i++) {
       j=0;
       while (j < m && y[i+j] == x[j]) j++;
       if (j >= m) Railgunhits++; //return(y+i);
AdvanceHopperGrass++;
    }

GlobalSP += (int)((double)n/AdvanceHopperGrass*100);
GlobalI += AdvanceHopperGrass;
printf("Skip-Performance(bigger-the-better): %d%%, %d skips/iterations\n",(int)((double)n/AdvanceHopperGrass*100), AdvanceHopperGrass);

    return(NULL);
   }
// ### Brute force 'Dummy' algorithm ]



















   char * SMITH_Boyer_Moore(char *y, char *x, long n, int m)
   {
    long i;
    int bm_bc[ASIZE], qs_bc[ASIZE];
   unsigned long  AdvanceHopperGrass;
   
    /* Preprocessing */
    for (i=0; i < ASIZE; i++) {
       bm_bc[i]=m;
       qs_bc[i]=m+1;
    }
    for (i=0; i < m-1; ++i) {
       bm_bc[x[i]]=m-i-1;
       qs_bc[x[i]]=m-i;
    }
    qs_bc[x[m-1]]=1;

AdvanceHopperGrass=0;   
    /* Searching */
    i=0;
    while (i <= n-m) {
       if (memcmp(&y[i], x, m) == 0) Railgunhits++; //return(i);
       i+=MAX(bm_bc[y[i+m-1]], qs_bc[y[i+m]]);
AdvanceHopperGrass++;
    }

GlobalSP += (int)((double)n/AdvanceHopperGrass*100);
GlobalI += AdvanceHopperGrass;
printf("Skip-Performance(bigger-the-better): %d%%, %d skips/iterations\n",(int)((double)n/AdvanceHopperGrass*100), AdvanceHopperGrass);

    return(NULL);
   }










// ### Karp-Rabin-Kaze_BOOSTED algorithm [
char * KarpRabinKaze_BOOSTED (char * pbTarget,
     char * pbPattern,
     unsigned long cbTarget,
     unsigned long cbPattern)
{
    char *  pbTargetMax = pbTarget + cbTarget;
    register unsigned long  ulHashPattern;
    unsigned long  ulHashTarget;

    if (cbPattern > cbTarget)
        return(NULL);

        pbTarget = pbTarget+cbPattern;
        ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));

    for ( ;; )
    {
        // Kaze: The idea(FAILED) here is to add an additional(second) layer in order to prevent execution of slower hash calculation(i.e. first layer) which(hash) prevents execution of even slower 'memcmp'.
        // The line below gives: 314KB/clock
        //if ( *pbPattern == *(char *)(pbTarget-cbPattern) && (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
        // The line below gives: 370KB/clock
        if ( (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
            return((pbTarget-cbPattern));

        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
}
// ### Karp-Rabin-Kaze_BOOSTED algorithm ]

char * strstr_Microsoft (
        const char * str1,
        const char * str2
        )
{
        char *cp = (char *) str1;
        char *s1, *s2;

        if ( !*str2 )
            return((char *)str1);

        while (*cp)
        {
                s1 = cp;
                s2 = (char *) str2;

                while ( *s1 && *s2 && !(*s1-*s2) )
                        s1++, s2++;

                if (!*s2)
                        return(cp);

                cp++;
        }
        return(NULL);
}

char *
strstr_GNU_C_Library (phaystack, pneedle)
     const char *phaystack;
     const char *pneedle;
{
  const unsigned char *haystack, *needle;
  char b;
  const unsigned char *rneedle;

  haystack = (const unsigned char *) phaystack;

  if ((b = *(needle = (const unsigned char *) pneedle)))
    {
      char c;
      haystack--;		/* possible ANSI violation */

      {
	char a;
	do
	  if (!(a = *++haystack))
	    goto ret0;
	while (a != b);
      }

      if (!(c = *++needle))
	goto foundneedle;
      ++needle;
      goto jin;

      for (;;)
	{
	  {
	    char a;
	    if (0)
	    jin:{
		if ((a = *++haystack) == c)
		  goto crest;
	      }
	    else
	      a = *++haystack;
	    do
	      {
		for (; a != b; a = *++haystack)
		  {
		    if (!a)
		      goto ret0;
		    if ((a = *++haystack) == b)
		      break;
		    if (!a)
		      goto ret0;
		  }
	      }
	    while ((a = *++haystack) != c);
	  }
	crest:
	  {
	    char a;
	    {
	      const unsigned char *rhaystack;
	      if (*(rhaystack = haystack-- + 1) == (a = *(rneedle = needle)))
		do
		  {
		    if (!a)
		      goto foundneedle;
		    if (*++rhaystack != (a = *++needle))
		      break;
		    if (!a)
		      goto foundneedle;
		  }
		while (*++rhaystack == (a = *++needle));
	      needle = rneedle;	/* took the register-poor aproach */
	    }
	    if (!a)
	      break;
	  }
	}
    }
foundneedle:
  return (char *) haystack;
ret0:
  return 0;
}



int main( int argc, char *argv[] )
  {
FILE *fp_inLINE;
FILE *fp_in;
FILE *fp_in2;
FILE *fp_out;
FILE *fp_out2;
FILE *fp_out3;
long size_in, size_in2;
int Bozan;
long ThunderwithL, ThunderwithR;
char *Strng;
long Strnglen;
long StrnglenTRAVERSED;
char Pattern[20+2000]; // skillessness=12 human consciousness=19 I should have known=19
// In the East, enlightenment is described as a state of ultimate=62
int Patternlen;      
long LinesEncountered=0;
long BruteForceDummyhits=0;
long KarpRabinKazehits=0;
long KarpRabinKaze_BOOSTEDhits=0;
long Karp_Rabin_Kaze_4_OCTETShits=0;
long Karp_Rabin_Kaze_4_OCTETShits_DOUBLET=0;
long KarpRabinhits=0;
long HORSPOOLhits=0;
long HORSPOOL_Kazehits=0;
long strstrMicrosofthits=0;
long strstrGNUCLibraryhits=0;
long Railgun_Quadruplet_6pp_GO=0;
long Railgun_Quadruplet_88=0;
long BNDM32=0;
long Railgun_Quadruplet_6pp_GOtws=0;
long Railgun_Quadruplet_6pp_GOtwl=0;

long dummy;
unsigned long long ReallyTraversed;
      char llTOaDigits[27]; // 9,223,372,036,854,775,807: 1(sign or carry)+19(digits)+1('\0')+6(,)

int i, j;
char *DumboBox[8][2] = { "an\0", "to\0",
                         "TDK\0", "the\0",
                         "fast\0", "easy\0",
                         "grmbl\0", "email\0",
                         "pasting\0", "amazing\0",
                         "underdog\0", "superdog\0",
                         "participants\0", "skillessness\0",
                         "I should have known\0", "human consciousness\0"
};


char *Dumbino[52] = { 
"fast\0",
"from\0",
"SR71\0",
"SONY\0",
"Lexx\0",
"Rabea\0",
"makes\0",
"punny\0",
"funny\0",
"Orion\0",
"lemon\0",
"Hi-Fi\0",
"Tesla\0",
"Apache\0",
"monkey\0",
"ramjet\0",
"fallen\0",
"Albert\0",
"Toshiba\0",
"grammar\0",
"pharaoh\0",
"decades\0",
"#define\0",
"Delerium\0",
"numberly\0",
"stellify\0",
"elephant\0",
"Layalina\0",
"Fibonacci\0",
"profanism\0",
"butterfly\0",
"Beth Ditto\0",
"Got Nuffin\0",
"fly fly fly\0",
"quintillion\0",
"Sick Of Love\0",
"Love Lockdown\0",
"Dannii Minogue\0",
"299,792,458 m/s\0",
"Extreme-Fidelity\0",
"I should have known\0",
"human consciousness\0",
"Truth's your only fear\0",
"Never thought I'd do it\0",
"innocence over fading fast\0",
"I hit the ground runnin' free\0",
"I colour out the darkest clouds\0",
"But he's looking right through me\0",
"I'm living in an age that calls darkness light\0",
"Following the wanderings of a dream - a dream that keeps my soul alive\0",
"I notice what matters and I got nothing to lose but darkness and shadows\0",
"Then, singing among the savage branches, it impales itself upon the longest, sharpest spine. And, dying, it rises above its own agony to outcarol the lark and the nightingale.\0"
};

char *BULLY12to48[25] = { 
"you have read\0",
"you should have\0",
"If you have read\0",
"ideas and vision\0",
"have read through\0",
"preceding chapters\0",
"a pretty good idea\0",
"and its techniques\0",
"idea on how to make\0",
"is the largest ever\0",
"The purpose of this\0",
"electronic repository\0",
"much more, a complete,\0",
"many people as possible\0",
"understanding and knowledge\0",
"and to make them available to as\0",
"What use you will put it to is up to you.\0",
"CD-ROM is to provide access to Osho's words\0",
"world view of the New Man and a new way of life\0",
"TGAGCACATTGAGAGCCTCCAAGGCATGGAGT\0",
"TTGAGAGCCTCCAAGGCATGGAGT\0",
"AGCCTCCAAGGCATGGAGT\0",
"CCAAGGCATGGAGT\0",
"AAGGCATGGAGT\0",
"Premium Full Leaf Tea\0",
};

long FoundIn;
char *FoundInPTR;

// " &quot;
// & &amp;
// < &lt;
// > &gt;
//  &nbsp;

      char workbyte;
      char workK[1024*128];
      long workKoffset = -1;
      char wrd[MaxWrdLen+2];  // 0..30, 31 = 0
      char wrdLOWER[MaxWrdLen+2];  // 0..30, 31 = 0
      char *BOLDl = "<u><b><big>\0";
      char *BOLDr = "</big></b></u>\0";
      unsigned wrdlen;

char *UnacceptableInHTMLq[1] = { 
"&quot;\0",};
char *UnacceptableInHTMLa[1] = { 
"&amp;\0",};
char *UnacceptableInHTMLl[1] = { 
"&lt;\0",};
char *UnacceptableInHTMLg[1] = { 
"&gt;\0",};
char *UnacceptableInHTMLs[1] = { 
"&nbsp;\0",};
int GulliverFlag;
unsigned long FunctionInvocations=0;
unsigned long TotalLenghtOfPatterns=0;

// SKYFALL_TXT2HTML.c [
printf("SKYFALL_TXT2HTML, revision 2, written by Kaze.\n");

if (argc != 3 && argc != 4) {
printf("Usage: SKYFALL_TXT2HTML incoming-text-file incoming-familiar-wordlist-file [/Gulliver|/Hasherezade]\n");
printf("Note1: The bolded TXT file goes to SKYFALL_TXT2HTML.txt - the BODY of needed HTML file.\n");
printf("Note2: Words in range 1..%d are processed only!\n", MaxWrdLen);
printf("Note3: Optional /Gulliver replaces the default Railgun_7 (Horspool order 1) function.\n");
printf("Note4: Optional /Hasherezade replaces the default Railgun_7 (Horspool order 1) function.\n");
exit (1);
}

GulliverFlag=0;
if ( argc == 4 && strcmp("/Gulliver\0",argv[3]) == 0 ) GulliverFlag=1;
if ( argc == 4 && strcmp("/Hasherezade\0",argv[3]) == 0 ) GulliverFlag=2;

Strng = (char *)malloc( 220*1024*1024 );
if( Strng == NULL )
{ puts( "SKYFALL_TXT2HTML: Needed memory allocation denied!\n" ); return( 1 ); }

if( ( fp_in = fopen( argv[1], "rb" ) ) == NULL )
{ printf( "SKYFALL_TXT2HTML: Can't open file %s \n", argv[1] ); return( 1 ); }

fseek( fp_in, 0L, SEEK_END );
size_in = ftell( fp_in );
fseek( fp_in, 0L, SEEK_SET );
printf( "Size of 1st input file: %lu\n", size_in );

if( ( fp_in2 = fopen( argv[2], "rb" ) ) == NULL )
{ printf( "SKYFALL_TXT2HTML: Can't open file %s \n", argv[2] ); return( 1 ); }

fseek( fp_in2, 0L, SEEK_END );
size_in2 = ftell( fp_in2 );
fseek( fp_in2, 0L, SEEK_SET );
printf( "Size of 2nd input file: %lu\n", size_in2 );

if( ( fp_out2 = fopen( "SKYFALL_TXT2HTML.txt", "wb+" ) ) == NULL )
{ printf( "SKYFALL_TXT2HTML: Can't create file 'SKYFALL_TXT2HTML.txt'.\n" ); return( 1 ); }

   fseek(fp_in2, 0, SEEK_END);
   Strnglen = ftell(fp_in2);
   fseek(fp_in2, 0, SEEK_SET);
   fread(Strng+2, 1, Strnglen, fp_in2);

Strng[0]=0xD; //CR
Strng[1]=0xA; //LF
Strnglen += 2;

// Replacing CR with NULL i.e. 13->0
//	for (ThunderwithL=0; ThunderwithL<Strnglen; ThunderwithL++)
//		if (Strng[ThunderwithL] == 13) {Strng[ThunderwithL] = 0; nlines++; nlines_A++;}

// The incoming wordlist has format: wordCRLFwordCRLF,...,wordCRLF
// The pool in memory has format: CRLFwordCRLFwordCRLF,...,wordCRLF
// So the dummy search should be for LFwordCR:

// Search ...

clocks1 = clock();
ReallyTraversed=0;
if (GulliverFlag==1)
printf( "\nDoing Search for each word with Railgun_Quadruplet_7Gulliver ...\n");
else if (GulliverFlag==2)
printf( "\nDoing Search for each word with Railgun_Hasherezade ...\n");
else
printf( "\nDoing Search for each word with Railgun_Quadruplet_7 ...\n");
//Search area is between Strng[0] .. Strng[n-1]

        //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	wrdlen = 0;
	wrd[0]=0xA;
	wrdLOWER[0]=0xA;
        for( i = 0; i < size_in; i++ )
	{

                // ~~~~~~~~~~~~ Buffering fread [
                if (workKoffset == -1) {
                        if (i + 1024*128 < size_in) {
                                fread( &workK[0], 1, 1024*128, fp_in );
                                workKoffset = 0;
                                workbyte = workK[workKoffset];
                        } else 
                        fread( &workbyte, 1, 1, fp_in );
                } else {
                        workKoffset++;
                        workbyte = workK[workKoffset];
                        if (workKoffset == 1024*128 - 1) workKoffset = -1;
                }
                // ~~~~~~~~~~~~ Buffering fread ]

                if( isalpha( workbyte ) )
                {
                       if( wrdlen < MaxWrdLen )
                        { wrd[ 1+wrdlen ] = workbyte; wrdLOWER[ 1+wrdlen ] = tolower( workbyte );}
                        wrdlen++;
			// I DO NOT DUMP the bigger words ON PURPOSE!
                } else {
                       	if( wrdlen >= 1 && wrdlen <= MaxWrdLen ) {
			wrdLOWER[wrdlen+1]=0xD; // LFaCR

			Railgunhits=0;
			FunctionInvocations++; TotalLenghtOfPatterns += 1+wrdlen+1;
			if (GulliverFlag==1)
			FoundInPTR = Railgun_Quadruplet_7Gulliver(&Strng[0], wrdLOWER, Strnglen, 1+wrdlen+1);
			else if (GulliverFlag==2)
			FoundInPTR = Railgun_Hasherezade(&Strng[0], wrdLOWER, Strnglen, 1+wrdlen+1);
			else
			FoundInPTR = Railgun_Quadruplet_7(&Strng[0], wrdLOWER, Strnglen, 1+wrdlen+1);
			
			if ((long)(FoundInPTR-&Strng[0])>=0) ReallyTraversed=ReallyTraversed+(long)(FoundInPTR-&Strng[0])+1;
			else ReallyTraversed=ReallyTraversed+Strnglen;

			if ((long)(FoundInPTR-&Strng[0])>=0) { fwrite(wrd+1, wrdlen, 1, fp_out2); }
			else { fwrite(BOLDl, strlen(BOLDl), 1, fp_out2); fwrite(wrd+1, wrdlen, 1, fp_out2); fwrite(BOLDr, strlen(BOLDr), 1, fp_out2); }

			}
			wrdlen = 0;
	                if ( workbyte == 34 ) { fprintf( fp_out2, "%s", UnacceptableInHTMLq[0] ); }
	                else if ( workbyte == '&' ) { fprintf( fp_out2, "%s", UnacceptableInHTMLa[0] ); }
        	        else if ( workbyte == '<' ) { fprintf( fp_out2, "%s", UnacceptableInHTMLl[0] ); }
	                else if ( workbyte == '>' ) { fprintf( fp_out2, "%s", UnacceptableInHTMLg[0] ); }
        	        else if ( workbyte == 0 ) { fprintf( fp_out2, "%s", UnacceptableInHTMLs[0] ); }
			else fprintf( fp_out2, "%c", workbyte );
		}


/*
                if ( workbyte < 'A' ) // Most characters are under alphabet - only one if
                {

                }
                //else if( workbyte >= 'A' &&  workbyte <= 'Z' )
                else if( workbyte <= 'Z' )
		{
                        //if( wrdlen < 31 )
                        if( wrdlen < LongestLineInclusive )
                        { wrd[ wrdlen ] = workbyte + 32 ; }
                        wrdlen++;
		}
                else if( workbyte >= 'a' &&  workbyte <= 'z' )
		{
                        //if( wrdlen < 31 )
                        if( wrdlen < LongestLineInclusive )
                        { wrd[ wrdlen ] = workbyte; }
                        wrdlen++;
		}
		else
                {

		}
*/
        } // i 'for'
        //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
clocks2 = clock(); TotalRoughSearchTime += clocks2 - clocks1; TotalRoughSearchTime++;

// Caramba on one line they don't work!?
if (GulliverFlag==1)
printf( "Railgun_Quadruplet_7Gulliver performance: %lu+KB/clock\n", ((unsigned long long)ReallyTraversed/((long)(TotalRoughSearchTime)))>>10 );
else if (GulliverFlag==2)
printf( "Railgun_Hasherezade performance: %lu+KB/clock\n", ((unsigned long long)ReallyTraversed/((long)(TotalRoughSearchTime)))>>10 );
else
printf( "Railgun_Quadruplet_7 performance: %lu+KB/clock\n", ((unsigned long long)ReallyTraversed/((long)(TotalRoughSearchTime)))>>10 );

printf( "Average Pattern Length: %s\n", _ui64toaKAZEcomma(TotalLenghtOfPatterns/FunctionInvocations, llTOaDigits, 10));
printf( "Function Invocations: %s\n", _ui64toaKAZEcomma(FunctionInvocations, llTOaDigits, 10));
printf( "Function Inner-Loop Iterations: %s\n", _ui64toaKAZEcomma(GlobalI, llTOaDigits, 10));
printf( "Function Really Traversed: %sKB\n", _ui64toaKAZEcomma(ReallyTraversed>>10, llTOaDigits, 10));

free(Strng);
fclose(fp_in);fclose(fp_in2);fclose(fp_out2);
exit(1);
// SKYFALL_TXT2HTML.c ]

// strstr_SHORT-SHOWDOWN.c [

printf("strstr_SHORT-SHOWDOWN, revision Doublet_8Triplet_7Trident_7Hasherezade_vs_7Gulliver2_vs_7Elsiane_vs_7sun_vs_7_vs_7sunhorse_vs_7deuce_vs_BMF, written by Kaze.\n");
printf("Full credits to: R.S. Boyer, J.S. Moore, R.N. Horspool, D.M. Sunday.\n");
//printf("Note: There is something very inconsistent even wrong with all timings, still can't figure out the reason!\n");
//printf("Note: There is something very buggy with arrays(sometimes the entire function is not executed, but when they (3) are defined as global this bug vanishes?!) of Boyer_Moore_Flensburg, still can't figure out the reason!\n");
printf("Usage: strstr_SHORT-SHOWDOWN.exe [anystring] [anystring]\n");
printf("Example1(keyboard test): strstr_SHORT-SHOWDOWN.exe\n");
printf("Example2(DUMBO 8x2 test): strstr_SHORT-SHOWDOWN.exe go\n");
printf("Example3(6x2 and 52 tests): strstr_SHORT-SHOWDOWN.exe go go\n");
Pattern[0]=0x00;
Strng = (char *)malloc( 220*1024*1024 );
if( Strng == NULL )
{ puts( "strstr_SHORT-SHOWDOWN: Needed memory allocation denied!\n" ); return( 1 ); }
if( ( fp_inLINE = fopen( "OSHO.TXT", "rb" ) ) == NULL )
{ printf( "strstr_SHORT-SHOWDOWN: Can't open 'OSHO.TXT' file.\n" ); return( 1 ); }

   fseek(fp_inLINE, 0, SEEK_END);
   Strnglen = ftell(fp_inLINE);
   fseek(fp_inLINE, 0, SEEK_SET);
   fread(Strng, 1, Strnglen, fp_inLINE);

// Replacing CR with NULL i.e. 13->0
	for (ThunderwithL=0; ThunderwithL<Strnglen; ThunderwithL++)
		if (Strng[ThunderwithL] == 13) Strng[ThunderwithL] = 0;
ThunderwithL=0;ThunderwithR=0;

if (argc==2) goto LineBylinePfu;
if (argc!=1) goto SlowerButStronger;

if (argc==1) {
printf("\nLast 6 lines of 'OSHO.TXT' file 197 MB (206,908,949 bytes):\n");
printf("           If you have read through the preceding chapters you should have a pretty good idea on how to make\n");
printf("           use of the Osho Books on CD-ROM. What use you will put it to is up to you. It is the largest ever\n");
printf("           electronic repository of understanding and knowledge on meditation and its techniques. It is also\n");
printf("           much more, a complete, world view of the New Man and a new way of life. The purpose of this\n");
printf("           CD-ROM is to provide access to Osho's words, ideas and vision, and to make them available to as\n");
printf("           many people as possible.\n");

printf("\nLast 3 lines of 'hs_alt_HuRef_chr1.fa' file 212 MB (222,610,477 bytes):\n");
printf("AGATTTTAAAGATTTTCTTTTTTTTTGACATAGAATCTTATGGAGGCTGAGAAATAATTTTTTTTCTATT\n");
printf("TTATTCTTCAGCCCCAGGTGTTTGCTTTTGCAGATTCTTGAGCACATTGAGAGCCTCCAAGGCATGGAGT\n");
printf("GGGGTGCCTGAAGTT\n");

printf("\nInput Pattern(up to 19+2000 chars): "); gets(Pattern); // char * __cdecl gets(char *);
Patternlen = strlen(&Pattern[0]);
}


// As one line: [
Railgunhits=0;
printf( "\nDoing Search for Pattern(%dbytes) into String(%dbytes) as-one-line ...\n", Patternlen, Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

FoundInPTR= Railgun_Quadruplet_7sun_count_hits(&Strng[0], &Pattern[0], Strnglen, Patternlen);
//printf("%s\n",FoundInPTR);

}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Bozan_hits/Bozan_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
//printf( "Bozan performance: %luKB/clock\n", (Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
printf( "Railgun_Quadruplet_7sun_hits/Railgun_Quadruplet_7sun_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7sun performance: %luKB/clock\n", (Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]


// As one line: [
Railgunhits=0;
printf( "\nDoing Search for Pattern(%dbytes) into String(%dbytes) as-one-line ...\n", Patternlen, Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

FoundInPTR= Railgun_Quadruplet_7_count_hits(&Strng[0], &Pattern[0], Strnglen, Patternlen);
//printf("%s\n",FoundInPTR);

}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Bozan_hits/Bozan_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
//printf( "Bozan performance: %luKB/clock\n", (Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
printf( "Railgun_Quadruplet_7_hits/Railgun_Quadruplet_7_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7 performance: %luKB/clock\n", (Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]

// As one line: [
Railgunhits=0;
printf( "\nDoing Search for Pattern(%dbytes) into String(%dbytes) as-one-line ...\n", Patternlen, Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

FoundInPTR= Railgun_Quadruplet_7Trident_count_hits(&Strng[0], &Pattern[0], Strnglen, Patternlen);
//printf("%s\n",FoundInPTR);

}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Bozan_hits/Bozan_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
//printf( "Bozan performance: %luKB/clock\n", (Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
printf( "Railgun_Quadruplet_7Trident_hits/Railgun_Quadruplet_7Trident_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7Trident performance: %luKB/clock\n", (Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]


// As one line: [
Railgunhits=0;
printf( "\nDoing Search for Pattern(%dbytes) into String(%dbytes) as-one-line ...\n", Patternlen, Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

FoundInPTR= BNDM_32_count_hits(&Strng[0], &Pattern[0], Strnglen, Patternlen);
//printf("%s\n",FoundInPTR);

}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Bozan_hits/Bozan_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
//printf( "Bozan performance: %luKB/clock\n", (Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
printf( "BNDM_32_hits/BNDM_32_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "BNDM_32 performance: %luKB/clock\n", (Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]



// As one line: [
Railgunhits=0;
printf( "\nDoing Search for Pattern(%dbytes) into String(%dbytes) as-one-line ...\n", Patternlen, Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

FoundInPTR= Railgun_Quadruplet_7sunhorse_count_hits(&Strng[0], &Pattern[0], Strnglen, Patternlen);
//printf("%s\n",FoundInPTR);

}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Bozan_hits/Bozan_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
//printf( "Bozan performance: %luKB/clock\n", (Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
printf( "Railgun_Quadruplet_7sunhorse_hits/Railgun_Quadruplet_7sunhorse_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7sunhorse performance: %luKB/clock\n", (Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]


// As one line: [
Railgunhits=0;
printf( "\nDoing Search for Pattern(%dbytes) into String(%dbytes) as-one-line ...\n", Patternlen, Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

FoundInPTR= Railgun_Quadruplet_7deuce_count_hits(&Strng[0], &Pattern[0], Strnglen, Patternlen);
//printf("%s\n",FoundInPTR);

}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Bozan_hits/Bozan_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
//printf( "Bozan performance: %luKB/clock\n", (Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
printf( "Railgun_Quadruplet_7deuce_hits/Railgun_Quadruplet_7deuce_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7deuce performance: %luKB/clock\n", (Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]


// As one line: [
Railgunhits=0;
printf( "\nDoing Search for Pattern(%dbytes) into String(%dbytes) as-one-line ...\n", Patternlen, Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

FoundInPTR= Railgun_Quadruplet_7Elsiane_count_hits(&Strng[0], &Pattern[0], Strnglen, Patternlen);
//printf("%s\n",FoundInPTR);

}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Bozan_hits/Bozan_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
//printf( "Bozan performance: %luKB/clock\n", (Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
printf( "Railgun_Quadruplet_7Elsiane_hits/Railgun_Quadruplet_7Elsiane_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7Elsiane performance: %luKB/clock\n", (Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]


// As one line: [
Railgunhits=0;
printf( "\nDoing Search for Pattern(%dbytes) into String(%dbytes) as-one-line ...\n", Patternlen, Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

FoundInPTR= Railgun_Quadruplet_7Gulliver_count_hits(&Strng[0], &Pattern[0], Strnglen, Patternlen);
//printf("%s\n",FoundInPTR);

}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Bozan_hits/Bozan_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
//printf( "Bozan performance: %luKB/clock\n", (Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
printf( "Railgun_Quadruplet_7Gulliver_hits/Railgun_Quadruplet_7Gulliver_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7Gulliver performance: %luKB/clock\n", (Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]



// As one line: [
Railgunhits=0;
printf( "\nDoing Search for Pattern(%dbytes) into String(%dbytes) as-one-line ...\n", Patternlen, Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

FoundInPTR= Railgun_Quadruplet_7Hasherezade_count_hits(&Strng[0], &Pattern[0], Strnglen, Patternlen);
//printf("%s\n",FoundInPTR);

}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Bozan_hits/Bozan_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
//printf( "Bozan performance: %luKB/clock\n", (Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
printf( "Railgun_Quadruplet_7Hasherezade_hits/Railgun_Quadruplet_7Hasherezade_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7Hasherezade performance: %luKB/clock\n", (Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]





// As one line: [
Railgunhits=0;
printf( "\nDoing Search for Pattern(%dbytes) into String(%dbytes) as-one-line ...\n", Patternlen, Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

FoundInPTR= Boyer_Moore_Flensburg(&Strng[0], &Pattern[0], Strnglen, Patternlen);
//printf("%s\n",FoundInPTR);

}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Bozan_hits/Bozan_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
//printf( "Bozan performance: %luKB/clock\n", (Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
printf( "Boyer_Moore_Flensburg_hits/Boyer_Moore_Flensburg_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Boyer_Moore_Flensburg performance: %luKB/clock\n", (Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]





// As one line: [
Railgunhits=0;
printf( "\nDoing Search for Pattern(%dbytes) into String(%dbytes) as-one-line ...\n", Patternlen, Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

FoundInPTR= TW(&Strng[0], &Pattern[0], Strnglen, Patternlen);
//printf("%s\n",FoundInPTR);

}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "Two-Way_hits/Two-Way_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Two-Way performance: %luKB/clock\n", (Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]

exit(0);




if (argc==1) {

goto zzzzzzzzzzzzzzzzz;

// As one line: [
Railgunhits=0;
printf( "\nDoing Search for Pattern(%dbytes) into String(%dbytes) as-one-line ...\n", Patternlen, Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

	FoundInPTR = TW(&Strng[0], &Pattern[0], Strnglen, Patternlen);
//printf("%s\n",FoundInPTR);

}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "TW_hits/TW_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "TW performance: %luKB/clock\n", (Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]




// As one line: [
Railgunhits=0;
printf( "\nDoing Search for Pattern(%dbytes) into String(%dbytes) as-one-line ...\n", Patternlen, Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

	dummy = HORSPOOL_hits(&Strng[0], &Pattern[0], Strnglen, Patternlen);

}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "BM_HORSPOOL_hits/BM_HORSPOOL_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "BM_HORSPOOL performance: %luKB/clock\n", (Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]



// As one line: [
Railgunhits=0;
printf( "\nDoing Search for Pattern(%dbytes) into String(%dbytes) as-one-line ...\n", Patternlen, Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

	FoundInPTR = Railgun_totalhits(&Strng[0], &Pattern[0], Strnglen, Patternlen);

}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "Railgun_hits/Railgun_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun performance: %luKB/clock\n", (Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]

zzzzzzzzzzzzzzzzz:

// As one line: [
Railgunhits=0;
printf( "\nDoing Search for Pattern(%dbytes) into String(%dbytes) as-one-line ...\n", Patternlen, Strnglen);
printf( "WARNING! The next line works with BMH only for pattern 4[+] long, otherwise (for 2 and 3) other searcher takes over!\n");
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

	FoundInPTR = Railgun_Quadruplet_6pp_count_hits(&Strng[0], &Pattern[0], Strnglen, Patternlen);

}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_6pp performance: %luKB/clock\n", (Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]



// As one line: [
Railgunhits=0;
printf( "\nDoing Search for Pattern(%dbytes) into String(%dbytes) as-one-line ...\n", Patternlen, Strnglen);
printf( "WARNING! The next line works with BMH only for pattern 4[+] long, otherwise (for 2 and 3) other searcher takes over!\n");
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

	FoundInPTR = Railgun_Quadruplet_7deuce_count_hits(&Strng[0], &Pattern[0], Strnglen, Patternlen);

}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "Railgun_Quadruplet_7deuce_hits/Railgun_Quadruplet_7deuce_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7deuce performance: %luKB/clock\n", (Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]



// As one line: [
Railgunhits=0;
printf( "\nDoing Search for Pattern(%dbytes) into String(%dbytes) as-one-line ...\n", Patternlen, Strnglen);
printf( "WARNING! The next line works with BMH only for pattern 4[+] long, otherwise (for 2 and 3) other searcher takes over!\n");
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

	FoundInPTR = Railgun_Quadruplet_7_count_hits(&Strng[0], &Pattern[0], Strnglen, Patternlen);

}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "Railgun_Quadruplet_7_hits/Railgun_Quadruplet_7_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7 performance: %luKB/clock\n", (Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]








} //if (argc==1) {


// NEW TEST APPROACH HERE - the old BMH loop of Railgun_Quadruplet_6pp is 67 bytes long whereas the new Railgun_Quadruplet_7mm has 61 i.e. under 64. [
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

printf( "\nNote: Executing the next two tests 256 times i.e. the search is for 256x8x2 patterns!\n");

ReallyTraversed=0;
// As one line: [
printf( "\nDoing Search for 8x2 Patterns into String(%dbytes) as-one-line ... Not-Counting-Hits-Just-Returns-First-One\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<8); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0; i<8; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_6pp(&Strng[0], DumboBox[i][0], Strnglen, strlen(DumboBox[i][0]));
if ((long)(FoundInPTR-&Strng[0])>=0) ReallyTraversed=ReallyTraversed+(long)(FoundInPTR-&Strng[0])+1;
else ReallyTraversed=ReallyTraversed+Strnglen;
clocks4 = clock();
if (Bozan == (1<<8)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         if ((long)(FoundInPTR-&Strng[0])>=0) printf( "Found ('%s') at %d position, Railgun_Quadruplet_6pp performance: %luKB/clock\n", DumboBox[i][0], FoundInPTR-&Strng[0], ((long)(FoundInPTR-&Strng[0])/((long)(TotalRoughSearchTime)))>>10);
			 else printf( "Found ('%s') at %d position, Railgun_Quadruplet_6pp performance: %luKB/clock\n", DumboBox[i][0], FoundInPTR-&Strng[0], (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_6pp(&Strng[0], DumboBox[i][1], Strnglen, strlen(DumboBox[i][1]));
if ((long)(FoundInPTR-&Strng[0])>=0) ReallyTraversed=ReallyTraversed+(long)(FoundInPTR-&Strng[0])+1;
else ReallyTraversed=ReallyTraversed+Strnglen;
clocks4 = clock();
if (Bozan == (1<<8)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         if ((long)(FoundInPTR-&Strng[0])>=0) printf( "Found ('%s') at %d position, Railgun_Quadruplet_6pp performance: %luKB/clock\n", DumboBox[i][1], FoundInPTR-&Strng[0], ((long)(FoundInPTR-&Strng[0])/((long)(TotalRoughSearchTime)))>>10);
			 else printf( "Found ('%s') at %d position, Railgun_Quadruplet_6pp performance: %luKB/clock\n", DumboBox[i][1], FoundInPTR-&Strng[0], (Strnglen/((long)(TotalRoughSearchTime)))>>10);

                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_6pp 8x2 i.e. average performance: %luKB/clock\n", ((unsigned long long)ReallyTraversed/((long)(TotalRoughSearchTime)))>>10);
printf( "ReallyTraversed: %s bytes\n", _ui64toaKAZEcomma(ReallyTraversed, llTOaDigits, 10));
// As one line: ]


ReallyTraversed=0;
// As one line: [
printf( "\nDoing Search for 8x2 Patterns into String(%dbytes) as-one-line ... Not-Counting-Hits-Just-Returns-First-One\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<8); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0; i<8; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_7(&Strng[0], DumboBox[i][0], Strnglen, strlen(DumboBox[i][0]));
if ((long)(FoundInPTR-&Strng[0])>=0) ReallyTraversed=ReallyTraversed+(long)(FoundInPTR-&Strng[0])+1;
else ReallyTraversed=ReallyTraversed+Strnglen;
clocks4 = clock();
if (Bozan == (1<<8)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         if ((long)(FoundInPTR-&Strng[0])>=0) printf( "Found ('%s') at %d position, Railgun_Quadruplet_7 performance: %luKB/clock\n", DumboBox[i][0], FoundInPTR-&Strng[0], ((long)(FoundInPTR-&Strng[0])/((long)(TotalRoughSearchTime)))>>10);
			 else printf( "Found ('%s') at %d position, Railgun_Quadruplet_7 performance: %luKB/clock\n", DumboBox[i][0], FoundInPTR-&Strng[0], (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_7(&Strng[0], DumboBox[i][1], Strnglen, strlen(DumboBox[i][1]));
if ((long)(FoundInPTR-&Strng[0])>=0) ReallyTraversed=ReallyTraversed+(long)(FoundInPTR-&Strng[0])+1;
else ReallyTraversed=ReallyTraversed+Strnglen;
clocks4 = clock();
if (Bozan == (1<<8)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         if ((long)(FoundInPTR-&Strng[0])>=0) printf( "Found ('%s') at %d position, Railgun_Quadruplet_7 performance: %luKB/clock\n", DumboBox[i][1], FoundInPTR-&Strng[0], ((long)(FoundInPTR-&Strng[0])/((long)(TotalRoughSearchTime)))>>10);
			 else printf( "Found ('%s') at %d position, Railgun_Quadruplet_7 performance: %luKB/clock\n", DumboBox[i][1], FoundInPTR-&Strng[0], (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7 8x2 i.e. average performance: %luKB/clock\n", ((unsigned long long)ReallyTraversed/((long)(TotalRoughSearchTime)))>>10);
printf( "ReallyTraversed: %s bytes\n", _ui64toaKAZEcomma(ReallyTraversed, llTOaDigits, 10));
// As one line: ]

// ====================================================================================================================

printf( "\nNote: Executing the next two tests 256 times i.e. the search is for 256x52 patterns, all utilizing BMH!\n");


ReallyTraversed=0;
// As one line: [
printf( "\nDoing Search for 52 Patterns into String(%dbytes) as-one-line ... Not-Counting-Hits-Just-Returns-First-One\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<8); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0; i<52; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_6pp(&Strng[0], Dumbino[i], Strnglen, strlen(Dumbino[i]));
if ((long)(FoundInPTR-&Strng[0])>=0) ReallyTraversed=ReallyTraversed+(long)(FoundInPTR-&Strng[0])+1;
else ReallyTraversed=ReallyTraversed+Strnglen;
clocks4 = clock();
if (Bozan == (1<<8)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         if ((long)(FoundInPTR-&Strng[0])>=0) printf( "Found ('%s') at %d position, Railgun_Quadruplet_6pp performance: %luKB/clock\n", Dumbino[i], FoundInPTR-&Strng[0], ((long)(FoundInPTR-&Strng[0])/((long)(TotalRoughSearchTime)))>>10);
			 else printf( "Found ('%s') at %d position, Railgun_Quadruplet_6pp performance: %luKB/clock\n", Dumbino[i], FoundInPTR-&Strng[0], (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_6pp 52 i.e. average performance: %luKB/clock\n", ((unsigned long long)ReallyTraversed/((long)(TotalRoughSearchTime)))>>10);
printf( "TotalRoughSearchTime: %s clocks\n", _ui64toaKAZEcomma(TotalRoughSearchTime, llTOaDigits, 10));
printf( "ReallyTraversed: %s bytes\n", _ui64toaKAZEcomma(ReallyTraversed, llTOaDigits, 10));
// As one line: ]



ReallyTraversed=0;
// As one line: [
printf( "\nDoing Search for 52 Patterns into String(%dbytes) as-one-line ... Not-Counting-Hits-Just-Returns-First-One\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<8); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0; i<52; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_7(&Strng[0], Dumbino[i], Strnglen, strlen(Dumbino[i]));
if ((long)(FoundInPTR-&Strng[0])>=0) ReallyTraversed=ReallyTraversed+(long)(FoundInPTR-&Strng[0])+1;
else ReallyTraversed=ReallyTraversed+Strnglen;
clocks4 = clock();
if (Bozan == (1<<8)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         if ((long)(FoundInPTR-&Strng[0])>=0) printf( "Found ('%s') at %d position, Railgun_Quadruplet_7 performance: %luKB/clock\n", Dumbino[i], FoundInPTR-&Strng[0], ((long)(FoundInPTR-&Strng[0])/((long)(TotalRoughSearchTime)))>>10);
			 else printf( "Found ('%s') at %d position, Railgun_Quadruplet_7 performance: %luKB/clock\n", Dumbino[i], FoundInPTR-&Strng[0], (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7 52 i.e. average performance: %luKB/clock\n", ((unsigned long long)ReallyTraversed/((long)(TotalRoughSearchTime)))>>10);
printf( "TotalRoughSearchTime: %s clocks\n", _ui64toaKAZEcomma(TotalRoughSearchTime, llTOaDigits, 10));
printf( "ReallyTraversed: %s bytes\n", _ui64toaKAZEcomma(ReallyTraversed, llTOaDigits, 10));
// As one line: ]


// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// NEW TEST APPROACH HERE - the old BMH loop of Railgun_Quadruplet_6pp is 67 bytes long whereas the new Railgun_Quadruplet_7mm has 61 i.e. under 64. ]





SlowerButStronger:
goto QuickToHits;



// As one line: [
printf( "\nDoing Search for 8x2 Patterns into String(%dbytes) as-one-line ...\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0; i<8; i++)
   {
Railgunhits=0;
clocks3 = clock();
	dummy = HORSPOOL_hits(&Strng[0], DumboBox[i][0], Strnglen, strlen(DumboBox[i][0]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), BM_HORSPOOL performance: %luKB/clock\n", DumboBox[i][0], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
Railgunhits=0;
clocks3 = clock();
	dummy = HORSPOOL_hits(&Strng[0], DumboBox[i][1], Strnglen, strlen(DumboBox[i][1]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), BM_HORSPOOL performance: %luKB/clock\n", DumboBox[i][1], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "BM_HORSPOOL_hits/BM_HORSPOOL_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "BM_HORSPOOL 8x2 i.e. average performance: %luKB/clock\n", ((unsigned long long)8*2*Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]


/*
// As one line: [
printf( "\nDoing Search for 8x2 Patterns into String(%dbytes) as-one-line ...\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0; i<8; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = TW(&Strng[0], DumboBox[i][0], Strnglen, strlen(DumboBox[i][0]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), TW performance: %luKB/clock\n", DumboBox[i][0], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = TW(&Strng[0], DumboBox[i][1], Strnglen, strlen(DumboBox[i][1]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), TW performance: %luKB/clock\n", DumboBox[i][1], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Two-way_hits/Two-way_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "TW 8x2 i.e. average performance: %luKB/clock\n", ((unsigned long long)8*2*Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]
*/




// As one line: [
printf( "\nDoing Search for 8x2 Patterns into String(%dbytes) as-one-line ...\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0; i<8; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_6pp_count_hits(&Strng[0], DumboBox[i][0], Strnglen, strlen(DumboBox[i][0]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Railgun_6pp performance: %luKB/clock\n", DumboBox[i][0], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_6pp_count_hits(&Strng[0], DumboBox[i][1], Strnglen, strlen(DumboBox[i][1]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Railgun_6pp performance: %luKB/clock\n", DumboBox[i][1], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_6pp 8x2 i.e. average performance: %luKB/clock\n", ((unsigned long long)8*2*Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]



QuickToHits:






GlobalSP = 0;
GlobalI = 0;

// As one line: [
printf( "\nDoing Search for 6x2 Patterns into String(%dbytes) as-one-line ...\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0+2; i<8; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_7sun_count_hits(&Strng[0], DumboBox[i][0], Strnglen, strlen(DumboBox[i][0]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Railgun_Quadruplet_7sun performance: %luKB/clock\n", DumboBox[i][0], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_7sun_count_hits(&Strng[0], DumboBox[i][1], Strnglen, strlen(DumboBox[i][1]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Railgun_Quadruplet_7sun performance: %luKB/clock\n", DumboBox[i][1], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7sun 6x2 i.e. average performance: %luKB/clock\n", ((unsigned long long)8*2*Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]

printf( "Railgun_Quadruplet_7sun 6x2 total Skip-Performance/Iterations: %llu/%llu\n", GlobalSP, GlobalI);


GlobalSP = 0;
GlobalI = 0;

// As one line: [
printf( "\nDoing Search for 6x2 Patterns into String(%dbytes) as-one-line ...\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0+2; i<8; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_7_count_hits(&Strng[0], DumboBox[i][0], Strnglen, strlen(DumboBox[i][0]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Railgun_Quadruplet_7 performance: %luKB/clock\n", DumboBox[i][0], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_7_count_hits(&Strng[0], DumboBox[i][1], Strnglen, strlen(DumboBox[i][1]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Railgun_Quadruplet_7 performance: %luKB/clock\n", DumboBox[i][1], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7 6x2 i.e. average performance: %luKB/clock\n", ((unsigned long long)8*2*Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]

printf( "Railgun_Quadruplet_7 6x2 total Skip-Performance/Iterations: %llu/%llu\n", GlobalSP, GlobalI);






GlobalSP = 0;
GlobalI = 0;

// As one line: [
printf( "\nDoing Search for 6x2 Patterns into String(%dbytes) as-one-line ...\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0+2; i<8; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = BNDM_32_count_hits(&Strng[0], DumboBox[i][0], Strnglen, strlen(DumboBox[i][0]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), BNDM_32 performance: %luKB/clock\n", DumboBox[i][0], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = BNDM_32_count_hits(&Strng[0], DumboBox[i][1], Strnglen, strlen(DumboBox[i][1]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), BNDM_32 performance: %luKB/clock\n", DumboBox[i][1], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "BNDM_32 6x2 i.e. average performance: %luKB/clock\n", ((unsigned long long)8*2*Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]

printf( "BNDM_32 6x2 total Skip-Performance/Iterations: %llu/%llu\n", GlobalSP, GlobalI);


















GlobalSP = 0;
GlobalI = 0;

// As one line: [
printf( "\nDoing Search for 6x2 Patterns into String(%dbytes) as-one-line ...\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0+2; i<8; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_7sunhorse_count_hits(&Strng[0], DumboBox[i][0], Strnglen, strlen(DumboBox[i][0]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Railgun_Quadruplet_7sunhorse performance: %luKB/clock\n", DumboBox[i][0], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_7sunhorse_count_hits(&Strng[0], DumboBox[i][1], Strnglen, strlen(DumboBox[i][1]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Railgun_Quadruplet_7sunhorse performance: %luKB/clock\n", DumboBox[i][1], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7sunhorse 6x2 i.e. average performance: %luKB/clock\n", ((unsigned long long)8*2*Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]

printf( "Railgun_Quadruplet_7sunhorse 6x2 total Skip-Performance/Iterations: %llu/%llu\n", GlobalSP, GlobalI);



GlobalSP = 0;
GlobalI = 0;

// As one line: [
printf( "\nDoing Search for 6x2 Patterns into String(%dbytes) as-one-line ...\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0+2; i<8; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_7deuce_count_hits(&Strng[0], DumboBox[i][0], Strnglen, strlen(DumboBox[i][0]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Railgun_Quadruplet_7deuce performance: %luKB/clock\n", DumboBox[i][0], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_7deuce_count_hits(&Strng[0], DumboBox[i][1], Strnglen, strlen(DumboBox[i][1]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Railgun_Quadruplet_7deuce performance: %luKB/clock\n", DumboBox[i][1], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7deuce 6x2 i.e. average performance: %luKB/clock\n", ((unsigned long long)8*2*Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]

printf( "Railgun_Quadruplet_7deuce 6x2 total Skip-Performance/Iterations: %llu/%llu\n", GlobalSP, GlobalI);



GlobalSP = 0;
GlobalI = 0;

// As one line: [
printf( "\nDoing Search for 6x2 Patterns into String(%dbytes) as-one-line ...\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0+2; i<8; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_7Elsiane_count_hits(&Strng[0], DumboBox[i][0], Strnglen, strlen(DumboBox[i][0]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Railgun_Quadruplet_7Elsiane performance: %luKB/clock\n", DumboBox[i][0], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_7Elsiane_count_hits(&Strng[0], DumboBox[i][1], Strnglen, strlen(DumboBox[i][1]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Railgun_Quadruplet_7Elsiane performance: %luKB/clock\n", DumboBox[i][1], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7Elsiane 6x2 i.e. average performance: %luKB/clock\n", ((unsigned long long)8*2*Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]

printf( "Railgun_Quadruplet_7Elsiane 6x2 total Skip-Performance/Iterations: %llu/%llu\n", GlobalSP, GlobalI);



GlobalSP = 0;
GlobalI = 0;

// As one line: [
printf( "\nDoing Search for 6x2 Patterns into String(%dbytes) as-one-line ...\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0+2; i<8; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Boyer_Moore_Flensburg(&Strng[0], DumboBox[i][0], Strnglen, strlen(DumboBox[i][0]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Boyer_Moore_Flensburg performance: %luKB/clock\n", DumboBox[i][0], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Boyer_Moore_Flensburg(&Strng[0], DumboBox[i][1], Strnglen, strlen(DumboBox[i][1]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Boyer_Moore_Flensburg performance: %luKB/clock\n", DumboBox[i][1], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Boyer_Moore_Flensburg 6x2 i.e. average performance: %luKB/clock\n", ((unsigned long long)8*2*Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]

printf( "Boyer_Moore_Flensburg 6x2 total Skip-Performance/Iterations: %llu/%llu\n", GlobalSP, GlobalI);




GlobalSP = 0;
GlobalI = 0;

// As one line: [
printf( "\nDoing Search for 6x2 Patterns into String(%dbytes) as-one-line ...\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0+2; i<8; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_7Gulliver_count_hits(&Strng[0], DumboBox[i][0], Strnglen, strlen(DumboBox[i][0]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Railgun_Quadruplet_7Gulliver performance: %luKB/clock\n", DumboBox[i][0], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_7Gulliver_count_hits(&Strng[0], DumboBox[i][1], Strnglen, strlen(DumboBox[i][1]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Railgun_Quadruplet_7Gulliver performance: %luKB/clock\n", DumboBox[i][1], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7Gulliver 6x2 i.e. average performance: %luKB/clock\n", ((unsigned long long)8*2*Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]

printf( "Railgun_Quadruplet_7Gulliver 6x2 total Skip-Performance/Iterations: %llu/%llu\n", GlobalSP, GlobalI);



GlobalSP = 0;
GlobalI = 0;

// As one line: [
printf( "\nDoing Search for 6x2 Patterns into String(%dbytes) as-one-line ...\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0+2; i<8; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Brute_Force_Dummy(&Strng[0], DumboBox[i][0], Strnglen, strlen(DumboBox[i][0]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Brute_Force_Dummy performance: %luKB/clock\n", DumboBox[i][0], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Brute_Force_Dummy(&Strng[0], DumboBox[i][1], Strnglen, strlen(DumboBox[i][1]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Brute_Force_Dummy performance: %luKB/clock\n", DumboBox[i][1], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Brute_Force_Dummy 6x2 i.e. average performance: %luKB/clock\n", ((unsigned long long)8*2*Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]

printf( "Brute_Force_Dummy 6x2 total Skip-Performance/Iterations: %llu/%llu\n", GlobalSP, GlobalI);




// [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[


GlobalSP = 0;
GlobalI = 0;

// As one line: [
printf( "\nDoing Search for 25 Patterns into String(%dbytes) as-one-line ...\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<6); Bozan++) // 64 times, at end >>6
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0; i<25; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_7sun_count_hits(&Strng[0], BULLY12to48[i], Strnglen, strlen(BULLY12to48[i]));
clocks4 = clock();
if (Bozan == (1<<6)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Railgun_Quadruplet_7sun performance: %luKB/clock\n", BULLY12to48[i], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7sun 25 i.e. average performance: %luKB/clock\n", ((unsigned long long)25*Strnglen/(1+((long)(TotalRoughSearchTime)>>6)))>>10);
// As one line: ]

printf( "Railgun_Quadruplet_7sun 25 total Skip-Performance/Iterations: %llu/%llu\n", GlobalSP, GlobalI);


GlobalSP = 0;
GlobalI = 0;

// As one line: [
printf( "\nDoing Search for 25 Patterns into String(%dbytes) as-one-line ...\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<6); Bozan++) // 64 times, at end >>6
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0; i<25; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_7Trident_count_hits(&Strng[0], BULLY12to48[i], Strnglen, strlen(BULLY12to48[i]));
clocks4 = clock();
if (Bozan == (1<<6)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Railgun_Quadruplet_7Trident performance: %luKB/clock\n", BULLY12to48[i], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7Trident 25 i.e. average performance: %luKB/clock\n", ((unsigned long long)25*Strnglen/(1+((long)(TotalRoughSearchTime)>>6)))>>10);
// As one line: ]

printf( "Railgun_Quadruplet_7Trident 25 total Skip-Performance/Iterations: %llu/%llu\n", GlobalSP, GlobalI);


GlobalSP = 0;
GlobalI = 0;

// As one line: [
printf( "\nDoing Search for 25 Patterns into String(%dbytes) as-one-line ...\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<6); Bozan++) // 64 times, at end >>6
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0; i<25; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = BNDM_32_count_hits(&Strng[0], BULLY12to48[i], Strnglen, strlen(BULLY12to48[i]));
clocks4 = clock();
if (Bozan == (1<<6)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), BNDM performance: %luKB/clock\n", BULLY12to48[i], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "BNDM 25 i.e. average performance: %luKB/clock\n", ((unsigned long long)25*Strnglen/(1+((long)(TotalRoughSearchTime)>>6)))>>10);
// As one line: ]

printf( "BNDM 25 total Skip-Performance/Iterations: %llu/%llu\n", GlobalSP, GlobalI);



GlobalSP = 0;
GlobalI = 0;

// As one line: [
printf( "\nDoing Search for 25 Patterns into String(%dbytes) as-one-line ...\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<6); Bozan++) // 64 times, at end >>6
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0; i<25; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_7Elsiane_count_hits(&Strng[0], BULLY12to48[i], Strnglen, strlen(BULLY12to48[i]));
clocks4 = clock();
if (Bozan == (1<<6)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Railgun_Quadruplet_7Elsiane performance: %luKB/clock\n", BULLY12to48[i], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7Elsiane 25 i.e. average performance: %luKB/clock\n", ((unsigned long long)25*Strnglen/(1+((long)(TotalRoughSearchTime)>>6)))>>10);
// As one line: ]

printf( "Railgun_Quadruplet_7Elsiane 25 total Skip-Performance/Iterations: %llu/%llu\n", GlobalSP, GlobalI);


GlobalSP = 0;
GlobalI = 0;

// As one line: [
printf( "\nDoing Search for 25 Patterns into String(%dbytes) as-one-line ...\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<6); Bozan++) // 64 times, at end >>6
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0; i<25; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_7Gulliver_count_hits(&Strng[0], BULLY12to48[i], Strnglen, strlen(BULLY12to48[i]));
clocks4 = clock();
if (Bozan == (1<<6)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Railgun_Quadruplet_7Gulliver performance: %luKB/clock\n", BULLY12to48[i], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7Gulliver 25 i.e. average performance: %luKB/clock\n", ((unsigned long long)25*Strnglen/(1+((long)(TotalRoughSearchTime)>>6)))>>10);
// As one line: ]

printf( "Railgun_Quadruplet_7Gulliver 25 total Skip-Performance/Iterations: %llu/%llu\n", GlobalSP, GlobalI);


GlobalSP = 0;
GlobalI = 0;

// As one line: [
printf( "\nDoing Search for 25 Patterns into String(%dbytes) as-one-line ...\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<6); Bozan++) // 64 times, at end >>6
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0; i<25; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_7Hasherezade_count_hits(&Strng[0], BULLY12to48[i], Strnglen, strlen(BULLY12to48[i]));
clocks4 = clock();
if (Bozan == (1<<6)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Railgun_Quadruplet_7Hasherezade performance: %luKB/clock\n", BULLY12to48[i], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7Hasherezade 25 i.e. average performance: %luKB/clock\n", ((unsigned long long)25*Strnglen/(1+((long)(TotalRoughSearchTime)>>6)))>>10);
// As one line: ]

printf( "Railgun_Quadruplet_7Hasherezade 25 total Skip-Performance/Iterations: %llu/%llu\n", GlobalSP, GlobalI);



// ]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]




















GlobalSP = 0;
GlobalI = 0;

// As one line: [
printf( "\nDoing Search for 52 Patterns into String(%dbytes) as-one-line ...\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0; i<52; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_7sun_count_hits(&Strng[0], Dumbino[i], Strnglen, strlen(Dumbino[i]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Railgun_Quadruplet_7sun performance: %luKB/clock\n", Dumbino[i], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7sun 52 i.e. average performance: %luKB/clock\n", ((unsigned long long)52*Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]

printf( "Railgun_Quadruplet_7sun 52 total Skip-Performance/Iterations: %llu/%llu\n", GlobalSP, GlobalI);


GlobalSP = 0;
GlobalI = 0;

// As one line: [
printf( "\nDoing Search for 52 Patterns into String(%dbytes) as-one-line ...\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0; i<52; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_7_count_hits(&Strng[0], Dumbino[i], Strnglen, strlen(Dumbino[i]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Railgun_Quadruplet_7 performance: %luKB/clock\n", Dumbino[i], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7 52 i.e. average performance: %luKB/clock\n", ((unsigned long long)52*Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]

printf( "Railgun_Quadruplet_7 52 total Skip-Performance/Iterations: %llu/%llu\n", GlobalSP, GlobalI);



GlobalSP = 0;
GlobalI = 0;

// As one line: [
printf( "\nDoing Search for 52 Patterns into String(%dbytes) as-one-line ...\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0; i<52; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_7sunhorse_count_hits(&Strng[0], Dumbino[i], Strnglen, strlen(Dumbino[i]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Railgun_Quadruplet_7sunhorse performance: %luKB/clock\n", Dumbino[i], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7sunhorse 52 i.e. average performance: %luKB/clock\n", ((unsigned long long)52*Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]

printf( "Railgun_Quadruplet_7sunhorse 52 total Skip-Performance/Iterations: %llu/%llu\n", GlobalSP, GlobalI);


GlobalSP = 0;
GlobalI = 0;

// As one line: [
printf( "\nDoing Search for 52 Patterns into String(%dbytes) as-one-line ...\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0; i<52; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_7deuce_count_hits(&Strng[0], Dumbino[i], Strnglen, strlen(Dumbino[i]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Railgun_Quadruplet_7deuce performance: %luKB/clock\n", Dumbino[i], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7deuce 52 i.e. average performance: %luKB/clock\n", ((unsigned long long)52*Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]

printf( "Railgun_Quadruplet_7deuce 52 total Skip-Performance/Iterations: %llu/%llu\n", GlobalSP, GlobalI);


GlobalSP = 0;
GlobalI = 0;

// As one line: [
printf( "\nDoing Search for 52 Patterns into String(%dbytes) as-one-line ...\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0; i<52; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_7Elsiane_count_hits(&Strng[0], Dumbino[i], Strnglen, strlen(Dumbino[i]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Railgun_Quadruplet_7Elsiane performance: %luKB/clock\n", Dumbino[i], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7Elsiane 52 i.e. average performance: %luKB/clock\n", ((unsigned long long)52*Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]

printf( "Railgun_Quadruplet_7Elsiane 52 total Skip-Performance/Iterations: %llu/%llu\n", GlobalSP, GlobalI);




GlobalSP = 0;
GlobalI = 0;

// As one line: [
printf( "\nDoing Search for 52 Patterns into String(%dbytes) as-one-line ...\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0; i<52; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Boyer_Moore_Flensburg(&Strng[0], Dumbino[i], Strnglen, strlen(Dumbino[i]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Boyer_Moore_Flensburg performance: %luKB/clock\n", Dumbino[i], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Boyer_Moore_Flensburg 52 i.e. average performance: %luKB/clock\n", ((unsigned long long)52*Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]

printf( "Boyer_Moore_Flensburg 52 total Skip-Performance/Iterations: %llu/%llu\n", GlobalSP, GlobalI);





GlobalSP = 0;
GlobalI = 0;

// As one line: [
printf( "\nDoing Search for 52 Patterns into String(%dbytes) as-one-line ...\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0; i<52; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Railgun_Quadruplet_7Gulliver_count_hits(&Strng[0], Dumbino[i], Strnglen, strlen(Dumbino[i]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Railgun_Quadruplet_7Gulliver performance: %luKB/clock\n", Dumbino[i], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7Gulliver 52 i.e. average performance: %luKB/clock\n", ((unsigned long long)52*Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]

printf( "Railgun_Quadruplet_7Gulliver 52 total Skip-Performance/Iterations: %llu/%llu\n", GlobalSP, GlobalI);





GlobalSP = 0;
GlobalI = 0;

// As one line: [
printf( "\nDoing Search for 52 Patterns into String(%dbytes) as-one-line ...\n", Strnglen);
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]

for (i = 0; i<52; i++)
   {
Railgunhits=0;
clocks3 = clock();
	FoundInPTR = Brute_Force_Dummy(&Strng[0], Dumbino[i], Strnglen, strlen(Dumbino[i]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
                         printf( "Found ('%s') %d time(s), Brute_Force_Dummy performance: %luKB/clock\n", Dumbino[i], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
                       }
   }
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Brute_Force_Dummy 52 i.e. average performance: %luKB/clock\n", ((unsigned long long)52*Strnglen/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
// As one line: ]

printf( "Brute_Force_Dummy 52 total Skip-Performance/Iterations: %llu/%llu\n", GlobalSP, GlobalI);











//exit(0);







if (argc==1) {

printf( "\nDoing Search for Pattern(%dbytes) into String(%dbytes) line-by-line ...\n", Patternlen, Strnglen);

// 7[
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
	for (;;)
	{
	while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
	FoundInPTR = strstr_Microsoft(&Strng[ThunderwithL], &Pattern[0]);
	if ( FoundInPTR != NULL) {strstrMicrosofthits++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
	LinesEncountered++;
	ThunderwithR++;	ThunderwithL=ThunderwithR;
	if (ThunderwithR >= Strnglen-1) break;
	}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "strstr_Microsoft_hits/strstr_Microsoft_clocks: %lu/%lu\n", strstrMicrosofthits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "strstr_Microsoft performance: %luKB/clock\n", (StrnglenTRAVERSED/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// 7]

// 8[
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
	for (;;)
	{
	while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
	FoundInPTR = strstr_GNU_C_Library(&Strng[ThunderwithL], &Pattern[0]);
	if ( FoundInPTR != 0) {strstrGNUCLibraryhits++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
	LinesEncountered++;
	ThunderwithR++;	ThunderwithL=ThunderwithR;
	if (ThunderwithR >= Strnglen-1) break;
	}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: %lu/%lu\n", strstrGNUCLibraryhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "strstr_GNU_C_Library performance: %luKB/clock\n", (StrnglenTRAVERSED/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// 8]

// 9[
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
	for (;;)
	{
	while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
	FoundInPTR = Railgun_Doublet(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
	if ( FoundInPTR != NULL) {Karp_Rabin_Kaze_4_OCTETShits++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
	LinesEncountered++;
	ThunderwithR++;	ThunderwithL=ThunderwithR;
	if (ThunderwithR >= Strnglen-1) break;
	}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "Railgun_Doublet_hits/Railgun_Doublet_clocks: %lu/%lu\n", Karp_Rabin_Kaze_4_OCTETShits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Doublet performance: %luKB/clock\n", (StrnglenTRAVERSED/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// 9]

// +[
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
	for (;;)
	{
	while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
	FoundInPTR = Railgun_Quadruplet(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
	if ( FoundInPTR != NULL) {Karp_Rabin_Kaze_4_OCTETShits_DOUBLET++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
	LinesEncountered++;
	ThunderwithR++;	ThunderwithL=ThunderwithR;
	if (ThunderwithR >= Strnglen-1) break;
	}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: %lu/%lu\n", Karp_Rabin_Kaze_4_OCTETShits_DOUBLET>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet performance: %luKB/clock\n", (StrnglenTRAVERSED/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// +]


// Z[
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
	for (;;)
	{
	while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
	FoundInPTR = Railgun_Quadruplet_7(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
	if ( FoundInPTR != NULL) {Railgun_Quadruplet_6pp_GO++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
	LinesEncountered++;
	ThunderwithR++;	ThunderwithL=ThunderwithR;
	if (ThunderwithR >= Strnglen-1) break;
	}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "Railgun_Quadruplet_7_hits/Railgun_Quadruplet_7_clocks: %lu/%lu\n", Railgun_Quadruplet_6pp_GO>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7 performance: %luKB/clock\n", (StrnglenTRAVERSED/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// Z]



// Z[
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
	for (;;)
	{
	while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
	FoundInPTR = Railgun_Quadruplet_8Triplet(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
	if ( FoundInPTR != NULL) {Railgun_Quadruplet_88++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
	LinesEncountered++;
	ThunderwithR++;	ThunderwithL=ThunderwithR;
	if (ThunderwithR >= Strnglen-1) break;
	}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: %lu/%lu\n", Railgun_Quadruplet_88>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_8Triplet performance: %luKB/clock\n", (StrnglenTRAVERSED/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// Z]



// Z[
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
	for (;;)
	{
	while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
	FoundInPTR = BNDM_32(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
	if ( FoundInPTR != NULL) {BNDM32++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
	LinesEncountered++;
	ThunderwithR++;	ThunderwithL=ThunderwithR;
	if (ThunderwithR >= Strnglen-1) break;
	}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "BNDM_32_hits/BNDM_32_clocks: %lu/%lu\n", BNDM32>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "BNDM_32 performance: %luKB/clock\n", (StrnglenTRAVERSED/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// Z]







// 6[
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
	for (;;)
	{
	while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
	FoundInPTR = KarpRabinKaze_BOOSTED(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
	if ( FoundInPTR != NULL) {KarpRabinKaze_BOOSTEDhits++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
	LinesEncountered++;
	ThunderwithR++;	ThunderwithL=ThunderwithR;
	if (ThunderwithR >= Strnglen-1) break;
	}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "KarpRabinKaze_BOOSTED_hits/KarpRabinKaze_BOOSTED_clocks: %lu/%lu\n", KarpRabinKaze_BOOSTEDhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "KarpRabinKaze_BOOSTED performance: %luKB/clock\n", (StrnglenTRAVERSED/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// 6]

// 2[
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
	for (;;)
	{
	while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
	FoundInPTR = KarpRabinKaze(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
	if ( FoundInPTR != NULL) {KarpRabinKazehits++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
	LinesEncountered++;
	ThunderwithR++;	ThunderwithL=ThunderwithR;
	if (ThunderwithR >= Strnglen-1) break;
	}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "KarpRabinKaze_hits/KarpRabinKaze_clocks: %lu/%lu\n", KarpRabinKazehits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "KarpRabinKaze performance: %luKB/clock\n", (StrnglenTRAVERSED/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// 2]

// 3[
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
	for (;;)
	{
	while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
	FoundIn = Karp_Rabin(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
	if ( FoundIn != -1) {KarpRabinhits++; StrnglenTRAVERSED=StrnglenTRAVERSED+FoundIn;} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
	LinesEncountered++;
	ThunderwithR++;	ThunderwithL=ThunderwithR;
	if (ThunderwithR >= Strnglen-1) break;
	}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "Karp_Rabin_hits/Karp_Rabin_clocks: %lu/%lu\n", KarpRabinhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Karp_Rabin performance: %luKB/clock\n", (StrnglenTRAVERSED/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// 3]


// 4[
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
	for (;;)
	{
	while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
	FoundIn = HORSPOOL(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
	if ( FoundIn != -1) {HORSPOOLhits++; StrnglenTRAVERSED=StrnglenTRAVERSED+FoundIn;} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
	LinesEncountered++;
	ThunderwithR++;	ThunderwithL=ThunderwithR;
	if (ThunderwithR >= Strnglen-1) break;
	}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "Boyer-Moore-Horspool_hits/Boyer-Moore-Horspool_clocks: %lu/%lu\n", HORSPOOLhits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Boyer-Moore-Horspool performance: %luKB/clock\n", (StrnglenTRAVERSED/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// 4]

// 5[
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
	for (;;)
	{
	while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
	FoundIn = Boyer_Moore_Horspool_Kaze(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
	if ( FoundIn != -1) {HORSPOOL_Kazehits++; StrnglenTRAVERSED=StrnglenTRAVERSED+FoundIn;} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
	LinesEncountered++;
	ThunderwithR++;	ThunderwithL=ThunderwithR;
	if (ThunderwithR >= Strnglen-1) break;
	}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "Boyer_Moore_Horspool_Kaze_hits/Boyer_Moore_Horspool_Kaze_clocks: %lu/%lu\n", HORSPOOL_Kazehits>>4, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Boyer_Moore_Horspool_Kaze performance: %luKB/clock\n", (StrnglenTRAVERSED/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// 5]


LineBylinePfu:

// DUMBO ... [
printf( "\nDUMBO 8x2 ...\n");
for (i = 0; i<8; i++)
   {
for (j = 0; j<2; j++)
   {

strcpy (Pattern, DumboBox[i][j]);
Patternlen = strlen(&Pattern[0]);

printf( "\nSearching for Pattern('%s',%dbytes) into String(%dbytes) line-by-line ...\n\n", Pattern, Patternlen, Strnglen);


// 7[
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
strstrMicrosofthits=0;
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
	for (;;)
	{
	while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
	FoundInPTR = strstr_Microsoft(&Strng[ThunderwithL], &Pattern[0]);
	if ( FoundInPTR != NULL) {strstrMicrosofthits++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
	LinesEncountered++;
	ThunderwithR++;	ThunderwithL=ThunderwithR;
	if (ThunderwithR >= Strnglen-1) break;
	}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "strstr_Microsoft_hits/strstr_Microsoft_clocks: %lu/%lu\n", strstrMicrosofthits, 1+((long)(TotalRoughSearchTime)>>4));
printf( "strstr_Microsoft performance: %luKB/clock\n", (StrnglenTRAVERSED/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// 7]

// 8[
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<5); Bozan++) // 16 times, at end >>4
{
strstrGNUCLibraryhits=0;
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
	for (;;)
	{
	while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
	FoundInPTR = strstr_GNU_C_Library(&Strng[ThunderwithL], &Pattern[0]);
	if ( FoundInPTR != 0) {strstrGNUCLibraryhits++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
	LinesEncountered++;
	ThunderwithR++;	ThunderwithL=ThunderwithR;
	if (ThunderwithR >= Strnglen-1) break;
	}
if (Bozan != (1<<5)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: %lu/%lu\n", strstrGNUCLibraryhits, 1+((long)(TotalRoughSearchTime)>>5));
printf( "strstr_GNU_C_Library performance: %luKB/clock\n", (StrnglenTRAVERSED/(1+((long)(TotalRoughSearchTime)>>5)))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// 8]

// 9[
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<5); Bozan++) // 16 times, at end >>4
{
Karp_Rabin_Kaze_4_OCTETShits=0;
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
	for (;;)
	{
	while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
	FoundInPTR = Railgun_Doublet(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
	if ( FoundInPTR != NULL) {Karp_Rabin_Kaze_4_OCTETShits++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
	LinesEncountered++;
	ThunderwithR++;	ThunderwithL=ThunderwithR;
	if (ThunderwithR >= Strnglen-1) break;
	}
if (Bozan != (1<<5)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "Railgun_Doublet_hits/Railgun_Doublet_clocks: %lu/%lu\n", Karp_Rabin_Kaze_4_OCTETShits, 1+((long)(TotalRoughSearchTime)>>5));
printf( "Railgun_Doublet performance: %luKB/clock\n", (StrnglenTRAVERSED/(1+((long)(TotalRoughSearchTime)>>5)))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// 9]

// +[
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
Karp_Rabin_Kaze_4_OCTETShits_DOUBLET=0;
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
	for (;;)
	{
	while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
	FoundInPTR = Railgun_Quadruplet(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
	if ( FoundInPTR != NULL) {Karp_Rabin_Kaze_4_OCTETShits_DOUBLET++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
	LinesEncountered++;
	ThunderwithR++;	ThunderwithL=ThunderwithR;
	if (ThunderwithR >= Strnglen-1) break;
	}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: %lu/%lu\n", Karp_Rabin_Kaze_4_OCTETShits_DOUBLET, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet performance: %luKB/clock\n", (StrnglenTRAVERSED/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// +]

// Z[
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
Karp_Rabin_Kaze_4_OCTETShits_DOUBLET=0;
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
	for (;;)
	{
	while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
	FoundInPTR = Railgun_Quadruplet_6pp(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
	if ( FoundInPTR != NULL) {Karp_Rabin_Kaze_4_OCTETShits_DOUBLET++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
	LinesEncountered++;
	ThunderwithR++;	ThunderwithL=ThunderwithR;
	if (ThunderwithR >= Strnglen-1) break;
	}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "Railgun_Quadruplet_6pp_hits/Railgun_Quadruplet_6pp_clocks: %lu/%lu\n", Karp_Rabin_Kaze_4_OCTETShits_DOUBLET, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_6pp performance: %luKB/clock\n", (StrnglenTRAVERSED/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// Z]


// M[
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
Karp_Rabin_Kaze_4_OCTETShits_DOUBLET=0;
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
	for (;;)
	{
	while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
	FoundInPTR = Railgun_Quadruplet_7(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
	if ( FoundInPTR != NULL) {Karp_Rabin_Kaze_4_OCTETShits_DOUBLET++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
	LinesEncountered++;
	ThunderwithR++;	ThunderwithL=ThunderwithR;
	if (ThunderwithR >= Strnglen-1) break;
	}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "Railgun_Quadruplet_7_hits/Railgun_Quadruplet_7_clocks: %lu/%lu\n", Karp_Rabin_Kaze_4_OCTETShits_DOUBLET, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7 performance: %luKB/clock\n", (StrnglenTRAVERSED/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// M]


// M[
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<5); Bozan++) // 16 times, at end >>4
{
Karp_Rabin_Kaze_4_OCTETShits_DOUBLET=0;
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
	for (;;)
	{
	while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
	FoundInPTR = Railgun_Quadruplet_8Triplet(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
	if ( FoundInPTR != NULL) {Karp_Rabin_Kaze_4_OCTETShits_DOUBLET++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
	LinesEncountered++;
	ThunderwithR++;	ThunderwithL=ThunderwithR;
	if (ThunderwithR >= Strnglen-1) break;
	}
if (Bozan != (1<<5)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "Railgun_Quadruplet_8Triplet_hits/Railgun_Quadruplet_8Triplet_clocks: %lu/%lu\n", Karp_Rabin_Kaze_4_OCTETShits_DOUBLET, 1+((long)(TotalRoughSearchTime)>>5));
printf( "Railgun_Quadruplet_8Triplet performance: %luKB/clock\n", (StrnglenTRAVERSED/(1+((long)(TotalRoughSearchTime)>>5)))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// M]

// M[
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<5); Bozan++) // 16 times, at end >>4
{
Karp_Rabin_Kaze_4_OCTETShits_DOUBLET=0;
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
	for (;;)
	{
	while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
	FoundInPTR = Railgun_Mischa_8Triplet(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
	if ( FoundInPTR != NULL) {Karp_Rabin_Kaze_4_OCTETShits_DOUBLET++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
	LinesEncountered++;
	ThunderwithR++;	ThunderwithL=ThunderwithR;
	if (ThunderwithR >= Strnglen-1) break;
	}
if (Bozan != (1<<5)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "Railgun_Mischa_8Triplet_hits/Railgun_Mischa_8Triplet_clocks: %lu/%lu\n", Karp_Rabin_Kaze_4_OCTETShits_DOUBLET, 1+((long)(TotalRoughSearchTime)>>5));
printf( "Railgun_Mischa_8Triplet performance: %luKB/clock\n", (StrnglenTRAVERSED/(1+((long)(TotalRoughSearchTime)>>5)))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// M]

// M[
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
Karp_Rabin_Kaze_4_OCTETShits_DOUBLET=0;
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
	for (;;)
	{
	while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
	FoundInPTR = BNDM_32(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
	if ( FoundInPTR != NULL) {Karp_Rabin_Kaze_4_OCTETShits_DOUBLET++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
	LinesEncountered++;
	ThunderwithR++;	ThunderwithL=ThunderwithR;
	if (ThunderwithR >= Strnglen-1) break;
	}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "BNDM_32_hits/BNDM_32_clocks: %lu/%lu\n", Karp_Rabin_Kaze_4_OCTETShits_DOUBLET, 1+((long)(TotalRoughSearchTime)>>4));
printf( "BNDM_32 performance: %luKB/clock\n", (StrnglenTRAVERSED/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// M]


// M[
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
Karp_Rabin_Kaze_4_OCTETShits_DOUBLET=0;
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
	for (;;)
	{
	while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
	FoundInPTR = Railgun_Quadruplet_7Gulliver(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
	if ( FoundInPTR != NULL) {Karp_Rabin_Kaze_4_OCTETShits_DOUBLET++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
	LinesEncountered++;
	ThunderwithR++;	ThunderwithL=ThunderwithR;
	if (ThunderwithR >= Strnglen-1) break;
	}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "Railgun_Quadruplet_7Gulliver_hits/Railgun_Quadruplet_7Gulliver_clocks: %lu/%lu\n", Karp_Rabin_Kaze_4_OCTETShits_DOUBLET, 1+((long)(TotalRoughSearchTime)>>4));
printf( "Railgun_Quadruplet_7Gulliver performance: %luKB/clock\n", (StrnglenTRAVERSED/(1+((long)(TotalRoughSearchTime)>>4)))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// M]


   }
   }
// DUMBO ... ]

} //if (argc==1) {

//exit(1); // Comment it to get on!

return(0);
  }


/*

An add-on from 2012-Jan-20:
[
Railgun_Quadruplet_7 vs Railgun_Quadruplet_7deuce vs Boyer_Moore vs Brute_Force: a face-off

Okay, nothing new, only a must-see benchmark against the original Boyer-Moore implementation.
Two tests were done - the first/second '6x2'/'52' searches 12/52 patterns (for all appearances i.e. counting all hits) into 206,908,949 bytes long English text.

Speed-Performance (measured in KB/clock, bigger-the-better) Summary:
Function                   6x2 test
Railgun_Quadruplet_7       2,039KB/clock / 137,568% / 006,454,416,320  BONBON
Boyer_Moore_Flensburg      1,239KB/clock / 139,168% / 006,428,115,568  Slow
Railgun_Quadruplet_7deuce  1,120KB/clock / 155,744% / 005,995,597,776  Slower
Brute_Force_Dummy          0,341KB/clock / 019,200% / 039,726,516,640  Slowest

Speed-Performance (measured in KB/clock, bigger-the-better) Summary:
Function                   52 test
Railgun_Quadruplet_7       1,645KB/clock / 708,768% / 025,368,049,712  BONBON
Boyer_Moore_Flensburg      0,987KB/clock / 720,272% / 025,227,312,176  Slow
Railgun_Quadruplet_7deuce  0,919KB/clock / 864,048% / 023,292,952,992  Slower
Brute_Force_Dummy          0,277KB/clock / 083,200% / 172,148,232,496  Slowest

Skip-Performance (measured in %, bigger-the-better) Summary:
Function                   6x2 test
Railgun_Quadruplet_7deuce  1,120KB/clock / 155,744% / 005,995,597,776  BONBON
Boyer_Moore_Flensburg      1,239KB/clock / 139,168% / 006,428,115,568  loopy
Railgun_Quadruplet_7       2,039KB/clock / 137,568% / 006,454,416,320  loopier
Brute_Force_Dummy          0,341KB/clock / 019,200% / 039,726,516,640  loopiest 

Skip-Performance (measured in %, bigger-the-better) Summary:
Function                   52 test
Railgun_Quadruplet_7deuce  0,919KB/clock / 864,048% / 023,292,952,992  BONBON
Boyer_Moore_Flensburg      0,987KB/clock / 720,272% / 025,227,312,176  loopy
Railgun_Quadruplet_7       1,645KB/clock / 708,768% / 025,368,049,712  loopier
Brute_Force_Dummy          0,277KB/clock / 083,200% / 172,148,232,496  loopiest 

A very easy-to-the-eyes/mind article at http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/bm.htm
Here I say 'Triple THANKS' to Mr. FH Flensburg, I used your code (after a straightforward port).
I see that Sunday algorithm has a potential, soon I will play with it.
]

*/

