// GRAFFITH wrapper over BSC.

/*-----------------------------------------------------------*/
/* Block Sorting, Lossless Data Compression Library.         */
/* Block Sorting Compressor                                  */
/*-----------------------------------------------------------*/

/*--

This file is a part of bsc and/or libbsc, a program and a library for
lossless, block-sorting data compression.

Copyright (c) 2009-2010 Ilya Grebnov <ilya.grebnov@libbsc.com>

The bsc and libbsc is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at your
option) any later version.

The bsc and libbsc is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.

You should have received a copy of the GNU Lesser General Public License
along with the bsc and libbsc. If not, see http://www.gnu.org/licenses/.

Please see the files COPYING and COPYING.LIB for full copyright information.

See also the bsc and libbsc web site:
  http://libbsc.com/ for more information.

--*/

#define _CRT_SECURE_NO_WARNINGS

#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <ctime>

#ifdef _OPENMP
  #include <omp.h>
#endif

#include <math.h>
#include <memory.h>

#include "../libbsc/libbsc.h"
#include "../libbsc/filters.h"

#pragma pack(push, 1)

#define LIBBSC_CONTEXTS_AUTODETECT   3

unsigned char bscFileSign[4] = {'b', 's', 'c', 0x23};

typedef struct BSC_BLOCK_HEADER
{
    long long   blockOffset;
    char        recordSize;
    char        sortingContexts;
} BSC_BLOCK_HEADER;


int paramBlockSize                = 3 * 1024 * 1024; // Kaze: this minimalistic value allows 24 threads to work simultaneously for my 3x24+MB chunks, also to utilize fully CPUs with 4 cores/threads and 12MB L3.
// Kaze [
long NumberOfFilesToDecompress = 0;
    double TotalDecompressTime = 0;
    double TotalDecompressSize = 0;
    double TotalSearchRailgunTime = 0;
    double TotalSearchRailgunSize = 0;
    double TotalSearchBlunderbussTime = 0;
    double TotalSearchBlunderbussSize = 0;
const int GRFFTH_DE_buffer_MAXSIZE= 4*(32 * (paramBlockSize*4) + 1); // Kaze: 32 threads * (3*4)MB = 384MB; *4 to allow 1+GB
unsigned long GRFFTH_DE_buffer_SIZE;
char *GRFFTH_DE_buffer_POINTER;
      FILE *fp_in1, *fp_in2, *fp_outLOG;
      unsigned long size_in1, size_in2, k1, k2, LINE10len1, LINE10len2, Graphein;
      char LINE10_1[257]; // 000..255, 256 = 0
      char LINE10_2[257]; // 000..255, 256 = 0
      char workbyte;
unsigned long Railgunhits;
unsigned long WILDgunhits;
unsigned long TotalRailgunhits;
unsigned long TotalWILDgunhits;
#define ASIZE 256
char *FoundInPTR;
char TAGfree[8] = "*@#^$|%";
int PatternLEN;

#ifdef __cplusplus
#define NULL 0
#else
#define NULL ((void*)0)
#endif

#ifndef FALSE
#define FALSE 0
#endif
#ifndef TRUE
#define TRUE 1
#endif

#ifndef false
#define false 0
#endif
#ifndef true
#define true 1
#endif

typedef unsigned char boolean;

#define KAZE_tolower(c) ( (((c) >= 'A') && ((c) <= 'Z')) ? ((c) - 'A' + 'a') : (c) )
#define KAZE_toupper(c) ( (((c) >= 'a') && ((c) <= 'z')) ? ((c) - 'a' + 'A') : (c) )

long maskGLOBALlen; // for speed up ((12415s - 755s)/755)*100% = 1544%
long nameGLOBALlen; 

long VIVA_IgorPavlov_invocations_global_counter = 0;
long WildGLOBALhits = 0, TotalLinesENC = 0;


void * memchrKAZE (
        const void * buf,
        const void * chr,
        unsigned long cnt
        )
{
        while ( cnt && (*(unsigned char *)buf != *(unsigned char*)chr) ) {
                buf = (unsigned char *)buf + 1;
                cnt--;
        }

        return(cnt ? (void *)buf : NULL);
}
//Exit:
//       returns pointer to first occurence of chr in buf
//       returns NULL if chr not found in the first cnt bytes


long KAZE_strlen (
        const char * str
        )
{
        const char *eos = str;

        while( *eos++ ) ;

        return( (int)(eos - str - 1) );
}
//_KAZE_strlen PROC NEAR
//; Line 225: const char *eos = str;
//        mov     ecx, DWORD PTR _str$[esp-4]
//        mov     eax, ecx
//$L725:
//; Line 227: while( *eos++ ) ;
//        mov     dl, BYTE PTR [eax]
//        inc     eax
//        test    dl, dl
//        jne     SHORT $L725
//; Line 229: return( (int)(eos - str - 1) );
//        sub     eax, ecx
//        dec     eax
//; Line 230
//        ret     0
//_KAZE_strlen ENDP


long KAZE_strlenLF (
        const char * str
        )
{
        const char *eos = str;
        char LFa[1];
        LFa[0] = 10; //BUG UNcrushed yet: for Windows 13 for POSIX 10
        while( *eos++ != LFa[0] ) ;

        return( (int)(eos - str - 1) );
}

// GRAFFITH.exe e OSHO.TXT OSHO.TXT.bsc -m0fb3  - decompression: 13s(2 cores) 45MB
// GRAFFITH.exe e OSHO.TXT OSHO.TXT.bsc -m3fb12 - decompression: 20s(2 cores) 34MB

//       wildcard '*' any character(s) or empty,
//       wildcard '@'/'#' any character {or empty}/{and not empty},
//       wildcard '^'/'$' any ALPHA character {or empty}/{and not empty},
//       wildcard '|'/'%' any NON-ALPHA character {or empty}/{and not empty},
//       wildcard '+'/'~' any WORD {or empty}/{and not empty}.

// wildcard '*' any character(s) or empty,
// wildcard '@' any character or empty,
// wildcard '#' any character and not empty,
// wildcard '$' any ALPHA character and not empty,
// wildcard '%' any NON-ALPHA character and not empty.
// Note: Due to different line endings(CRLF in Windows; LF in UNIX)
//       you must add a '@' wildcard in place of CR: for example in
//       case of searching for '*.pdf' write '*.pdf@'.
// Pattern example: *%%take@%%$$@

static boolean EnhancedMaskTest_OrEmpty_AndNotEmpty(const char *mask, int maskPos, 
                                                 const char *name, int namePos)
{
  char maskChar;
  //int maskLen = KAZE_strlen(mask) - maskPos;
  //int nameLen = KAZE_strlenLF(name) - namePos;
  // Above 2 lines are modified with GLOBAL variables for speed as follows:
  int maskLen = maskGLOBALlen - maskPos; // for speed up ((12415s - 755s)/755)*100% = 1544%
  int nameLen = nameGLOBALlen - namePos;
  if (maskLen == 0)
    if (nameLen == 0)
      return true;
    else
      return false;
  maskChar = mask[maskPos];
  if (maskChar == '@') // or empty    
  {
    /*
    if (EnhancedMaskTest_OrEmpty_AndNotEmpty(mask, maskPos + 1, name, namePos)) 
      return true;
    */
    if (EnhancedMaskTest_OrEmpty_AndNotEmpty(mask, maskPos + 1, name, namePos)) // KAZE: THIS LINE DECIDES whether 'or empty' or 'and not empty'
      return true;                                                  //       uncommented is 'or empty'
    if (nameLen == 0) 
      return false;
    return EnhancedMaskTest_OrEmpty_AndNotEmpty(mask,  maskPos + 1, name, namePos + 1);
  }
  else if(maskChar == '#') // and not empty
  {
    /*
    if (EnhancedMaskTest_OrEmpty_AndNotEmpty(mask, maskPos + 1, name, namePos)) 
      return true;
    */
    if (nameLen == 0) 
      return false;
    return EnhancedMaskTest_OrEmpty_AndNotEmpty(mask,  maskPos + 1, name, namePos + 1);
  }
//       wildcard '|'/'%' any NON-ALPHA character {or empty}/{and not empty},
  else if(maskChar == '|') // or empty AND NOT ALPHA
  {

    if (EnhancedMaskTest_OrEmpty_AndNotEmpty(mask, maskPos + 1, name, namePos)) 
      return true;

    char c = name[namePos]; 
      if ( (KAZE_toupper(c) >= 'A') && (KAZE_toupper(c) <= 'Z') ) // Stupidly slow: make it faster ...
        return false;

    if (nameLen == 0)
      return false;
    return EnhancedMaskTest_OrEmpty_AndNotEmpty(mask,  maskPos + 1, name, namePos + 1);
  }
  else if(maskChar == '%') // and not empty AND NOT ALPHA
  {
    /*
    if (EnhancedMaskTest_OrEmpty_AndNotEmpty(mask, maskPos + 1, name, namePos)) 
      return true;
    */

    char c = name[namePos]; 
      if ( (KAZE_toupper(c) >= 'A') && (KAZE_toupper(c) <= 'Z') ) // Stupidly slow: make it faster ...
        return false;

    if (nameLen == 0)
      return false;
    return EnhancedMaskTest_OrEmpty_AndNotEmpty(mask,  maskPos + 1, name, namePos + 1);
  }
//       wildcard '^'/'$' any ALPHA character {or empty}/{and not empty},
  else if(maskChar == '^') // or empty AND ALPHA
  {

    if (EnhancedMaskTest_OrEmpty_AndNotEmpty(mask, maskPos + 1, name, namePos)) 
      return true;

    char c = name[namePos]; 
      if ( (KAZE_toupper(c) < 'A') || (KAZE_toupper(c) > 'Z') ) // Stupidly slow: make it faster ...
        return false;

    if (nameLen == 0)
      return false;
    return EnhancedMaskTest_OrEmpty_AndNotEmpty(mask,  maskPos + 1, name, namePos + 1);
  }
  else if(maskChar == '$') // and not empty AND ALPHA
  {
    /*
    if (EnhancedMaskTest_OrEmpty_AndNotEmpty(mask, maskPos + 1, name, namePos)) 
      return true;
    */

    char c = name[namePos]; 
      if ( (KAZE_toupper(c) < 'A') || (KAZE_toupper(c) > 'Z') ) // Stupidly slow: make it faster ...
        return false;

    if (nameLen == 0)
      return false;
    return EnhancedMaskTest_OrEmpty_AndNotEmpty(mask,  maskPos + 1, name, namePos + 1);
  }
  else if(maskChar == '*')
  {
    if (EnhancedMaskTest_OrEmpty_AndNotEmpty(mask, maskPos + 1, name, namePos))
      return true;
    if (nameLen == 0) 
      return false;
    return EnhancedMaskTest_OrEmpty_AndNotEmpty(mask, maskPos, name, namePos + 1);
  }
  else
  {
    char c = name[namePos];
    //if (maskChar != c)
      if (KAZE_toupper(maskChar) != KAZE_toupper(c))
        return false;
    return EnhancedMaskTest_OrEmpty_AndNotEmpty(mask,  maskPos + 1, name, namePos + 1);
  }
}


boolean CompareWildCardWithName(const char *mask, const char *name)
{
  boolean Txpbool;
  VIVA_IgorPavlov_invocations_global_counter++;

  maskGLOBALlen = KAZE_strlen(mask);
  nameGLOBALlen = KAZE_strlenLF(name);

  Txpbool = EnhancedMaskTest_OrEmpty_AndNotEmpty(mask, 0, name, 0);
  if (Txpbool) WildGLOBALhits++;
  return Txpbool;
}
// Above fragment(modified) is from wildcard.cpp from 7zip package.


unsigned long Blunderbuss(char *y, char *x, long n, int m)
   {
int Cycle;
unsigned long hits = 0;
long ThunderwithL = 0, ThunderwithR = 0;
int OneFailureCanRuinYou;

    if ( n == 0 ) return(0);
    for (;;) {

  //Search area is between y[0] .. y[n-1]
    while (y[ThunderwithR] != 10) {++ThunderwithR;} // Works both on UNIX(LF) and Windows(CRLF)
    while (ThunderwithL != 0 && y[--ThunderwithL] != 10) {}
    if (ThunderwithL != 0) ThunderwithL++;
    TotalLinesENC++;

 // To avoid nasty big delays(hours sometime) length of searched line must be limited:
 // recursive function is very heavy then!
 // Something like LBL960 in memory.

OneFailureCanRuinYou = 0; // Presumption for flushing.

 if (ThunderwithR - ThunderwithL <= 960) {
                                         }
                                         else
 OneFailureCanRuinYou = 1; // Presumption for NOT flushing.

if (OneFailureCanRuinYou == 0)
{
          if ( CompareWildCardWithName ( x, &y[ThunderwithL] ) )
          { //if ( memchrKAZE(&y[ThunderwithL], &TAGfree[0], ThunderwithR - ThunderwithL) == NULL && memchrKAZE(&y[ThunderwithL], &TAGfree[1], ThunderwithR - ThunderwithL) == NULL )
            { hits++; 
              /*
              for( Cycle = 0; Cycle < ThunderwithR - ThunderwithL + 1; Cycle++ )
              putchar( y[ThunderwithL + Cycle]);
              */
              //putchar( '\n');
              fwrite( &y[ThunderwithL], ThunderwithR - ThunderwithL + 1, 1, fp_outLOG );
            }
          } // Wildcard if
}
    ThunderwithL = ++ThunderwithR;
    if ( ThunderwithR >= n - 1 ) return(hits);

    } // for (;;)
   }

// Kaze ]

int paramEnableSegmentation       = 0;
int paramEnableReordering         = 0;
int paramEnableFastMode           = 0;
int paramEnableLZP                = 1;
int paramLZPHashSize              = 16;
int paramLZPMinLen                = 128;
int paramBlockSorter              = LIBBSC_BLOCKSORTER_BWT;
int paramSortingContexts          = LIBBSC_CONTEXTS_FOLLOWING;

int paramEnableParallelProcessing = 1;
int paramEnableMultiThreading     = 1;

#pragma pack(pop)

#if defined(__GNUC__) && (defined(_GLIBCXX_USE_LFS) || defined(__MINGW32__))
    #define BSC_FSEEK fseeko64
    #define BSC_FTELL ftello64
    #define BSC_FILEOFFSET off64_t
#elif defined(_MSC_VER) && _MSC_VER >= 1400
    #define BSC_FSEEK _fseeki64
    #define BSC_FTELL _ftelli64
    #define BSC_FILEOFFSET __int64
#else
    #define BSC_FSEEK fseek
    #define BSC_FTELL ftell
    #define BSC_FILEOFFSET long
#endif

#if defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__) || defined(__MINGW32__) || defined(__BORLANDC__) || defined(_MSC_VER)
  #include <windows.h>
  double BSC_CLOCK() { return 0.001 * GetTickCount(); }
#elif defined (__unix) || defined (__linux__) || defined (__QNX__) || defined (_AIX)  || defined (__NetBSD__) || defined(macintosh) || defined (_MAC)
  #include <sys/time.h>
  double BSC_CLOCK() { timeval tv; gettimeofday(&tv, 0); return tv.tv_sec + tv.tv_usec * 0.000001; }
#else
  double BSC_CLOCK() { return (double)clock() / CLOCKS_PER_SEC; }
#endif

int segmentedBlock[256];

void Compression(char * argv[])
{
    if (!paramEnableLZP)
    {
        paramLZPHashSize = 0;
        paramLZPMinLen = 0;
    }

    FILE * fInput = fopen(argv[2], "rb");
    if (fInput == NULL)
    {
        fprintf(stderr, "Can't open input file: %s!\n", argv[2]);
        exit(1);
    }

    FILE * fOutput;
    if (*argv[1] == 'e') {
    fOutput = fopen(argv[3], "wb");
    if (fOutput == NULL)
    {
        fprintf(stderr, "Can't create output file: %s!\n", argv[3]);
        exit(1);
    }
    } // Kaze

    if (BSC_FSEEK(fInput, 0, SEEK_END))
    {
        fprintf(stderr, "IO error on file: %s!\n", argv[2]);
        exit(1);
    }

    BSC_FILEOFFSET fileSize = BSC_FTELL(fInput);
    if (fileSize < 0)
    {
        fprintf(stderr, "IO error on file: %s!\n", argv[2]);
        exit(1);
    }

    if (BSC_FSEEK(fInput, 0, SEEK_SET))
    {
        fprintf(stderr, "IO error on file: %s!\n", argv[2]);
        exit(1);
    }

    if (paramBlockSize > fileSize)
    {
        paramBlockSize = fileSize;
    }

    if (*argv[1] == 'e') {
    if (fwrite(bscFileSign, sizeof(bscFileSign), 1, fOutput) != 1)
    {
        fprintf(stderr, "IO error on file: %s!\n", argv[3]);
        exit(1);
    }
    } // Kaze

    int nBlocks = (int)((fileSize + paramBlockSize - 1) / paramBlockSize);
    if (*argv[1] == 'e') {
    if (fwrite(&nBlocks, sizeof(nBlocks), 1, fOutput) != 1)
    {
        fprintf(stderr, "IO error on file: %s!\n", argv[3]);
        exit(1);
    }
    } // Kaze
    double startTime = BSC_CLOCK();

#ifdef _OPENMP

    int numThreads = 1;
    if (paramEnableParallelProcessing)
    {
        numThreads = omp_get_max_threads();
        if (numThreads > nBlocks)
        {
            numThreads = nBlocks;
        }
    }

#endif

    int segmentationStart = 0, segmentationEnd = 0;

#ifdef _OPENMP
    #pragma omp parallel default(shared) num_threads(numThreads)
#endif
    {
        unsigned char * buffer = (unsigned char *)malloc(paramBlockSize + LIBBSC_HEADER_SIZE);
        if (buffer == NULL)
        {
#ifdef _OPENMP
            #pragma omp critical(print)
#endif
            {

                fprintf(stderr, "Not enough memory!\n");
                exit(2);
            }
        }

        while (true)
        {
            BSC_FILEOFFSET  blockOffset     = 0;
            int             dataSize        = 0;

#ifdef _OPENMP
            #pragma omp critical(input)
#endif
            {
                if ((feof(fInput) == 0) && (BSC_FTELL(fInput) != fileSize))
                {
#ifdef _OPENMP
                    #pragma omp master
#endif
                    {
                        double progress = (100.0 * (double)BSC_FTELL(fInput)) / fileSize;
                        if (*argv[1] == 'e') {
                        fprintf(stdout, "\rCompressing %.55s(%02d%%)", argv[2], (int)progress);
                        fflush(stdout);
                        } // Kaze
                        if (*argv[1] == 'E') {
                        fprintf(stdout, "\rCompressing(without writing) %.55s(%02d%%)", argv[2], (int)progress);
                        fflush(stdout);
                        } // Kaze
                    }

                    blockOffset = BSC_FTELL(fInput);

                    int currentBlockSize = paramBlockSize;
                    if (paramEnableSegmentation)
                    {
                        if (segmentationEnd - segmentationStart > 1) currentBlockSize = segmentedBlock[segmentationStart];
                    }

                    dataSize = (int)fread(buffer, 1, currentBlockSize, fInput);
                    if (dataSize <= 0)
                    {
                        fprintf(stderr, "\nIO error on file: %s!\n", argv[2]);
                        exit(1);
                    }

                    if (paramEnableSegmentation)
                    {
                        bool bSegmentation = false;

                        if (segmentationStart == segmentationEnd) bSegmentation = true;
                        if ((segmentationEnd - segmentationStart == 1) && (dataSize != segmentedBlock[segmentationStart])) bSegmentation = true;

                        if (bSegmentation)
                        {
                            segmentationStart = 0; segmentationEnd = bsc_detect_segments(buffer, dataSize, segmentedBlock, 256, paramEnableMultiThreading ? LIBBSC_FEATURE_MULTITHREADING : LIBBSC_FEATURE_NONE);
                            if (segmentationEnd <= LIBBSC_NO_ERROR)
                            {
                                switch (segmentationEnd)
                                {
                                    case LIBBSC_NOT_ENOUGH_MEMORY   : fprintf(stderr, "\nNot enough memory!\n"); break;
                                    default                         : fprintf(stderr, "\nInternal program error, please contact the author!\n");
                                }
                                exit(2);
                            }
                        }

                        int newDataSize = segmentedBlock[segmentationStart++];
                        if (dataSize != newDataSize)
                        {
                            BSC_FILEOFFSET pos = BSC_FTELL(fInput) - dataSize + newDataSize;
                            BSC_FSEEK(fInput, pos, SEEK_SET);
                            dataSize = newDataSize;
                        }
                    }
                }
            }

            if (dataSize == 0) break;

            char recordSize = 1;
            if (paramEnableReordering)
            {
                recordSize = bsc_detect_recordsize(buffer, dataSize, LIBBSC_FEATURE_FASTMODE);
                if (recordSize < LIBBSC_NO_ERROR)
                {
#ifdef _OPENMP
                    #pragma omp critical(print)
#endif
                    {
                        switch (recordSize)
                        {
                            case LIBBSC_NOT_ENOUGH_MEMORY   : fprintf(stderr, "\nNot enough memory!\n"); break;
                            default                         : fprintf(stderr, "\nInternal program error, please contact the author!\n");
                        }
                        exit(2);
                    }
                }
                if (recordSize > 1)
                {
                    int result = bsc_reorder_forward(buffer, dataSize, recordSize, paramEnableMultiThreading ? LIBBSC_FEATURE_MULTITHREADING : LIBBSC_FEATURE_NONE);
                    if (result != LIBBSC_NO_ERROR)
                    {
#ifdef _OPENMP
                        #pragma omp critical(print)
#endif
                        {
                            switch (result)
                            {
                                case LIBBSC_NOT_ENOUGH_MEMORY   : fprintf(stderr, "\nNot enough memory!\n"); break;
                                default                         : fprintf(stderr, "\nInternal program error, please contact the author!\n");
                            }
                            exit(2);
                        }
                    }
                }
            }

            char sortingContexts = paramSortingContexts;
            if (paramSortingContexts == LIBBSC_CONTEXTS_AUTODETECT)
            {
                sortingContexts = bsc_detect_contextsorder(buffer, dataSize, LIBBSC_FEATURE_FASTMODE);
                if (sortingContexts < LIBBSC_NO_ERROR)
                {
#ifdef _OPENMP
                    #pragma omp critical(print)
#endif
                    {
                        switch (sortingContexts)
                        {
                            case LIBBSC_NOT_ENOUGH_MEMORY   : fprintf(stderr, "\nNot enough memory!\n"); break;
                            default                         : fprintf(stderr, "\nInternal program error, please contact the author!\n");
                        }
                        exit(2);
                    }
                }
            }
            if (sortingContexts == LIBBSC_CONTEXTS_PRECEDING)
            {
                int result = bsc_reverse_block(buffer, dataSize, paramEnableMultiThreading ? LIBBSC_FEATURE_MULTITHREADING : LIBBSC_FEATURE_NONE);
                if (result != LIBBSC_NO_ERROR)
                {
#ifdef _OPENMP
                    #pragma omp critical(print)
#endif
                    {
                        fprintf(stderr, "\nInternal program error, please contact the author!\n");
                        exit(2);
                    }
                }
            }

            int features =
                (paramEnableMultiThreading ? LIBBSC_FEATURE_MULTITHREADING : LIBBSC_FEATURE_NONE) |
                (paramEnableFastMode ? LIBBSC_FEATURE_FASTMODE : LIBBSC_FEATURE_NONE)
            ;

            int blockSize = bsc_compress(buffer, buffer, dataSize, paramLZPHashSize, paramLZPMinLen, paramBlockSorter, features);
            if (blockSize == LIBBSC_NOT_COMPRESSIBLE)
            {
#ifdef _OPENMP
                #pragma omp critical(input)
#endif
                {
                    sortingContexts = LIBBSC_CONTEXTS_FOLLOWING; recordSize = 1;

                    BSC_FILEOFFSET pos = BSC_FTELL(fInput);
                    {
                        BSC_FSEEK(fInput, blockOffset, SEEK_SET);
                        if (dataSize != (int)fread(buffer, 1, dataSize, fInput))
                        {
                            fprintf(stderr, "\nInternal program error, please contact the author!\n");
                            exit(2);
                        }
                    }
                    BSC_FSEEK(fInput, pos, SEEK_SET);
                }

                blockSize = bsc_store(buffer, buffer, dataSize);
            }
            if (blockSize < LIBBSC_NO_ERROR)
            {
#ifdef _OPENMP
                #pragma omp critical(print)
#endif
                {
                    switch (blockSize)
                    {
                        case LIBBSC_NOT_ENOUGH_MEMORY   : fprintf(stderr, "\nNot enough memory!\n"); break;
                        default                         : fprintf(stderr, "\nInternal program error, please contact the author!\n");
                    }
                    exit(2);
                }
            }

#ifdef _OPENMP
            #pragma omp critical(output)
#endif
            {
                BSC_BLOCK_HEADER header = {blockOffset, recordSize, sortingContexts};
    if (*argv[1] == 'e') {
                if (fwrite(&header, sizeof(BSC_BLOCK_HEADER), 1, fOutput) != 1)
                {
                    fprintf(stderr, "\nIO error on file: %s!\n", argv[3]);
                    exit(1);
                }

                if ((int)fwrite(buffer, 1, blockSize, fOutput) != blockSize)
                {
                    fprintf(stderr, "\nIO error on file: %s!\n", argv[3]);
                    exit(1);
                }
    } // Kaze
            }

        }

        free(buffer);
    }

    if (*argv[1] == 'e') {
    fprintf(stdout, "\r%.55s compressed %.0f into %.0f in %.3f seconds.\n", argv[2], (double)fileSize, (double)BSC_FTELL(fOutput), BSC_CLOCK() - startTime);
    } // Kaze
    if (*argv[1] == 'E') {
    fprintf(stdout, "\r%.55s compressed %.0f in %.3f seconds.\n", argv[2], (double)fileSize, BSC_CLOCK() - startTime);
    } // Kaze

    fclose(fInput); 
    if (*argv[1] == 'e') fclose(fOutput);
}

void Decompression(char * argv[])
{
    FILE * fInput = fopen(argv[2], "rb");
    if (fInput == NULL)
    {
        fprintf(stderr, "Can't open input file: %s!\n", argv[2]);
        exit(1);
    }

    FILE * fOutput;
    if (*argv[1] == 'd') {
    fOutput = fopen(argv[3], "wb");
    if (fOutput == NULL)
    {
        fprintf(stderr, "Can't create output file: %s!\n", argv[3]);
        exit(1);
    }
    } // Kaze

    if (BSC_FSEEK(fInput, 0, SEEK_END))
    {
        fprintf(stderr, "IO error on file: %s!\n", argv[2]);
        exit(1);
    }

    BSC_FILEOFFSET fileSize = BSC_FTELL(fInput);
    if (fileSize < 0)
    {
        fprintf(stderr, "IO error on file: %s!\n", argv[2]);
        exit(1);
    }

    if (BSC_FSEEK(fInput, 0, SEEK_SET))
    {
        fprintf(stderr, "IO error on file: %s!\n", argv[2]);
        exit(1);
    }

    unsigned char inputFileSign[sizeof(bscFileSign)];

    if (fread(inputFileSign, sizeof(bscFileSign), 1, fInput) != 1)
    {
        fprintf(stderr, "This is not bsc archive!\n");
        exit(1);
    }

    if (memcmp(inputFileSign, bscFileSign, sizeof(bscFileSign)) != 0)
    {
        fprintf(stderr, "This is not bsc archive or invalid compression method!\n");
        exit(2);
    }

    int nBlocks = 0;
    if (fread(&nBlocks, sizeof(nBlocks), 1, fInput) != 1)
    {
        fprintf(stderr, "This is not bsc archive!\n");
        exit(1);
    }

    double startTime = BSC_CLOCK();

#ifdef _OPENMP

    int numThreads = 1;
    if (paramEnableParallelProcessing)
    {
        numThreads = omp_get_max_threads();
        if (numThreads > nBlocks)
        {
            numThreads = nBlocks;
        }
    }

    #pragma omp parallel default(shared) num_threads(numThreads)
#endif
    {
        int bufferSize = 1024;
        unsigned char * buffer = (unsigned char *)malloc(bufferSize);
        if (buffer == NULL)
        {
#ifdef _OPENMP
            #pragma omp critical(print)
#endif
            {
                fprintf(stderr, "Not enough memory!\n");
                exit(2);
            }
        }

        while (true)
        {
            BSC_FILEOFFSET  blockOffset     = 0;

            char            sortingContexts = 0;
            char            recordSize      = 0;
            int             blockSize       = 0;
            int             dataSize        = 0;

#ifdef _OPENMP
            #pragma omp critical(input)
#endif
            {
                if ((feof(fInput) == 0) && (BSC_FTELL(fInput) != fileSize))
                {
#ifdef _OPENMP
                    #pragma omp master
#endif
                    {
                        double progress = (100.0 * (double)BSC_FTELL(fInput)) / fileSize;
                        if (*argv[1] == 'd') {
                        fprintf(stdout, "\rDecompressing %.55s(%02d%%)", argv[2], (int)progress);
                        fflush(stdout);
                        } // Kaze
                        if (*argv[1] == 'D') {
                        fprintf(stdout, "\rDecompressing(without writing) %.55s(%02d%%)", argv[2], (int)progress);
                        fflush(stdout);
                        } // Kaze
                    }

                    BSC_BLOCK_HEADER header = {0};
                    if (fread(&header, sizeof(BSC_BLOCK_HEADER), 1, fInput) != 1)
                    {
                        fprintf(stderr, "\nUnexpected end of file: %s!\n", argv[2]);
                        exit(1);
                    }

                    recordSize = header.recordSize;
                    if (recordSize < 1)
                    {
                        fprintf(stderr, "\nThis is not bsc archive or invalid compression method!\n");
                        exit(2);
                    }

                    sortingContexts = header.sortingContexts;
                    if ((sortingContexts != LIBBSC_CONTEXTS_FOLLOWING) && (sortingContexts != LIBBSC_CONTEXTS_PRECEDING))
                    {
                        fprintf(stderr, "\nThis is not bsc archive or invalid compression method!\n");
                        exit(2);
                    }

                    blockOffset = (BSC_FILEOFFSET)header.blockOffset;

                    unsigned char bscBlockHeader[LIBBSC_HEADER_SIZE];

                    if (fread(bscBlockHeader, LIBBSC_HEADER_SIZE, 1, fInput) != 1)
                    {
                        fprintf(stderr, "\nUnexpected end of file: %s!\n", argv[2]);
                        exit(1);
                    }

                    if (bsc_block_info(bscBlockHeader, LIBBSC_HEADER_SIZE, &blockSize, &dataSize) != LIBBSC_NO_ERROR)
                    {
                        fprintf(stderr, "\nThis is not bsc archive or invalid compression method!\n");
                        exit(2);
                    }

                    if (blockSize > bufferSize)
                    {
                        free(buffer); buffer = (unsigned char *)malloc(blockSize);
                        bufferSize = blockSize;
                    }

                    if (dataSize > bufferSize)
                    {
                        free(buffer); buffer = (unsigned char *)malloc(dataSize);
                        bufferSize = dataSize;
                    }

                    if (buffer == NULL)
                    {
                        fprintf(stderr, "\nNot enough memory!\n");
                        exit(2);
                    }

                    memcpy(buffer, bscBlockHeader, LIBBSC_HEADER_SIZE);

                    if (fread(buffer + LIBBSC_HEADER_SIZE, blockSize - LIBBSC_HEADER_SIZE, 1, fInput) != 1)
                    {
                        fprintf(stderr, "\nUnexpected end of file: %s!\n", argv[2]);
                        exit(1);
                    }
                }
            }

            if (dataSize == 0) break;

            int result = bsc_decompress(buffer, blockSize, buffer, dataSize, paramEnableMultiThreading ? LIBBSC_FEATURE_MULTITHREADING : LIBBSC_FEATURE_NONE);
            if (result < LIBBSC_NO_ERROR)
            {
#ifdef _OPENMP
                #pragma omp critical(print)
#endif
                {
                    switch (result)
                    {
                        case LIBBSC_DATA_CORRUPT        : fprintf(stderr, "\nThe compressed data is corrupted!\n"); break;
                        case LIBBSC_NOT_ENOUGH_MEMORY   : fprintf(stderr, "\nNot enough memory!\n"); break;
                        default                         : fprintf(stderr, "\nInternal program error, please contact the author!\n");
                    }
                    exit(2);
                }
            }

            if (sortingContexts == LIBBSC_CONTEXTS_PRECEDING)
            {
                result = bsc_reverse_block(buffer, dataSize, paramEnableMultiThreading ? LIBBSC_FEATURE_MULTITHREADING : LIBBSC_FEATURE_NONE);
                if (result != LIBBSC_NO_ERROR)
                {
#ifdef _OPENMP
                    #pragma omp critical(print)
#endif
                    {
                        fprintf(stderr, "\nInternal program error, please contact the author!\n");
                        exit(2);
                    }
                }
            }

            if (recordSize > 1)
            {
                result = bsc_reorder_reverse(buffer, dataSize, recordSize, paramEnableMultiThreading ? LIBBSC_FEATURE_MULTITHREADING : LIBBSC_FEATURE_NONE);
                if (result != LIBBSC_NO_ERROR)
                {
#ifdef _OPENMP
                    #pragma omp critical(print)
#endif
                    {
                        switch (result)
                        {
                            case LIBBSC_NOT_ENOUGH_MEMORY   : fprintf(stderr, "\nNot enough memory!\n"); break;
                            default                         : fprintf(stderr, "\nInternal program error, please contact the author!\n");
                        }
                        exit(2);
                    }
                }
            }

#ifdef _OPENMP
            #pragma omp critical(output)
#endif
            {
    switch (*argv[1]) // Kaze [
    {
        case 'd' :
                if (BSC_FSEEK(fOutput, blockOffset, SEEK_SET))
                {
                    fprintf(stderr, "\nIO error on file: %s!\n", argv[3]);
                    exit(1);
                }
                if ((int)fwrite(buffer, 1, dataSize, fOutput) != dataSize)
                {
                    fprintf(stderr, "\nIO error on file: %s!\n", argv[3]);
                    exit(1);
                }
        break;
        case 'D' :
        break;
        default  :
        // GRAFFITH [
                        //fprintf(stdout, "GRAFFITH: Merging chunk of size %d bytes at %d offset...\n", (int)dataSize, (int)blockOffset);
  // ~~~~~~~~~~~~~~~Copy chunks to my Buffer [
        GRFFTH_DE_buffer_SIZE = GRFFTH_DE_buffer_SIZE + dataSize;
        if (blockOffset + dataSize <= GRFFTH_DE_buffer_MAXSIZE)
        memcpy (GRFFTH_DE_buffer_POINTER + blockOffset, buffer, dataSize);
        else
        { printf( "GRAFFITH: Decoded file exceeds %lu bytes.\n", GRFFTH_DE_buffer_MAXSIZE ); exit( 1 ); }
        //src = (char *)outBuf;
        //count = outPos;
        //while (count--) {
        //        *(char *)dst = *(char *)src;
        //        dst = (char *)dst + 1;
        //        src = (char *)src + 1;
        //}
  // ~~~~~~~~~~~~~~~Copy chunks to my Buffer ]
        // GRAFFITH ]
    } // Kaze ]
            }
        }

        free(buffer);
    }

    if (*argv[1] == 'd') {
    if (BSC_FSEEK(fOutput, 0, SEEK_END))
    {
        fprintf(stderr, "IO error on file: %s!\n", argv[3]);
        exit(1);
    }
    } // Kaze

    if (*argv[1] == 'd') {
    fprintf(stdout, "\r%.55s decompressed %.0f into %.0f in %.3f seconds.\n", argv[2], (double)fileSize, (double)BSC_FTELL(fOutput), BSC_CLOCK() - startTime);
    } // Kaze
    if (*argv[1] == 'D') {
    fprintf(stdout, "\r%.55s decompressed %.0f in %.3f seconds.\n", argv[2], (double)fileSize, BSC_CLOCK() - startTime);
    } // Kaze

    if (*argv[1] != 'D' && *argv[1] != 'd') {
    TotalDecompressTime = TotalDecompressTime + (BSC_CLOCK() - startTime);
if (TotalDecompressTime == 0) TotalDecompressTime++;
    TotalDecompressSize = TotalDecompressSize + GRFFTH_DE_buffer_SIZE;
    fprintf(stdout, "\r~ Overall decompression performance: %.2f MB/s so far\n", (TotalDecompressSize/1024/1024)/TotalDecompressTime);
    } // Kaze

    fclose(fInput);
    if (*argv[1] == 'd') fclose(fOutput);
}

void ShowUsage(void)
{
    fprintf(stdout, "Usage: GRAFFITH <e|E|d|D|_> inputfile outputfile <switches>\n\n");
    fprintf(stdout, "Options:\n");
    fprintf(stdout, "   E encodes without writing i.e. test-encode\n");
    fprintf(stdout, "   D decodes without writing i.e. test-decode\n");
    fprintf(stdout, "   # decodes without writing plus searching, MaxSizeOfDecodedBuffer = %luMB\n", (GRFFTH_DE_buffer_MAXSIZE>>20) );
    fprintf(stdout, "Switches:\n");
    fprintf(stdout, "  -b<size> Block size in megabytes, default: -b3\n");
    fprintf(stdout, "             minimum: -b1, maximum: -b1024\n");
    fprintf(stdout, "  -m<algo> Block sorting algorithm, default: -m3\n");
    fprintf(stdout, "             -m0 Sort Transform of order 3\n");
    fprintf(stdout, "             -m1 Sort Transform of order 4\n");
    fprintf(stdout, "             -m2 Sort Transform of order 5\n");
    fprintf(stdout, "             -m3 Burrows Wheeler Transform\n");
    fprintf(stdout, "  -c<ctx>  Contexts for sorting, default: -cf\n");
    fprintf(stdout, "             -cf Following contexts\n");
    fprintf(stdout, "             -cp Preceding contexts\n");
    fprintf(stdout, "             -ca Autodetect (experimental)\n");
    fprintf(stdout, "  -H<size> LZP hash table size in bits, default: -H16\n");
    fprintf(stdout, "             minimum: -H10, maximum: -H28\n");
    fprintf(stdout, "  -M<size> LZP minimum match length, default: -M128\n");
    fprintf(stdout, "             minimum: -M4, maximum: -M255\n");
    fprintf(stdout, "  -f       Enable fast compression mode, default: disable\n");
    fprintf(stdout, "  -l       Enable LZP, default: enable\n");
    fprintf(stdout, "  -r       Enable Reordering, default: disable\n");
    fprintf(stdout, "  -s       Enable Segmentation, default: disable\n");
    fprintf(stdout, "  -p       Disable all preprocessing techniques\n");

#ifdef _OPENMP
    fprintf(stdout, "  -t       Disable parallel blocks processing, default: enable\n");
    fprintf(stdout, "  -T       Disable multi-core systems support, default: enable\n");
#endif

    fprintf(stdout,"\nSwitches may be combined into one, like -b128p\n\n");

    fprintf(stdout, "Note1: GRAFFITH.ini: each line represents a pattern\n");
    fprintf(stdout, "       GRAFFITH.lst: each line represents a BSC file\n");
    fprintf(stdout, "       GRAFFITH.log: each line is a Blunderbuss(wildcard function) hit\n");
    fprintf(stdout, "Note2: If pattern contains at least one wildcard then Blunderbuss is evoked for\n");
    fprintf(stdout, "       each line in the file, otherwise Railgun is evoked for the entire file.\n");
    fprintf(stdout, "Note3: Blunderbuss dumps resultant hits, Railgun dumps hits too but\n");
    fprintf(stdout, "       the pattern must be equal to the whole(not partial match) line.\n");
    fprintf(stdout, "Note4: Seven wildcards are available:\n");
    fprintf(stdout, "       wildcard '*' any character(s) or empty,\n");
    fprintf(stdout, "       wildcard '@'/'#' any character {or empty}/{and not empty},\n");
    fprintf(stdout, "       wildcard '^'/'$' any ALPHA character {or empty}/{and not empty},\n");
    fprintf(stdout, "       wildcard '|'/'%%' any NON-ALPHA character {or empty}/{and not empty}.\n");
    fprintf(stdout, "Note5: Due to different line endings(CRLF in Windows; LF in UNIX)\n");
    fprintf(stdout, "       you must add a '@' wildcard in place of CR:\n");
    fprintf(stdout, "       for example in case of searching for '*.pdf' write '*.pdf@'.\n");
    fprintf(stdout, "Note6: A pseudo bug exists uncrushed - End-Of-File must be LF character.\n");
    fprintf(stdout, "Note7: To achieve fastest TEXT decompression consider one of: -m0fb3 -m3fb3.\n\n");
// GRAFFITH.exe e OSHO.TXT OSHO.TXT.bsc -m0fb3  - decompression: 13s(2 cores) 45MB
// GRAFFITH.exe e OSHO.TXT OSHO.TXT.bsc -m3fb3  - decompression: 18s(2 cores) 36MB
// GRAFFITH.exe e OSHO.TXT OSHO.TXT.bsc -m3fb12 - decompression: 20s(2 cores) 34MB

    fprintf(stdout, "Example1: GRAFFITH e Caterpillar.47.RAFT6.txt Caterpillar.47.RAFT6.txt.bsc -m3f\n");
    fprintf(stdout, "Example2: GRAFFITH d Caterpillar.47.RAFT6.txt.bsc Caterpillar.47.RAFT6.txt\n");
    fprintf(stdout, "Example3: GRAFFITH #\n");

    exit(0);
}

void ProcessSwitch(char * s)
{
    if (*s == 0)
    {
        ShowUsage();
    }

    for (; *s != 0; )
    {
        switch (*s++)
        {
            case 'b':
                {
                    char * strNum = s; while ((*s >= '0') && (*s <= '9')) s++;
                    paramBlockSize = atoi(strNum) * 1024 * 1024;
                    if ((paramBlockSize < 1024 * 1024) || (paramBlockSize > 1024 * 1024 * 1024)) ShowUsage();
                    break;
                }

            case 'm':
                {
                    char * strNum = s; while ((*s >= '0') && (*s <= '9')) s++;
                    switch (atoi(strNum))
                    {
                        case 0   : paramBlockSorter = LIBBSC_BLOCKSORTER_ST3; break;
                        case 1   : paramBlockSorter = LIBBSC_BLOCKSORTER_ST4; break;
                        case 2   : paramBlockSorter = LIBBSC_BLOCKSORTER_ST5; break;
                        case 3   : paramBlockSorter = LIBBSC_BLOCKSORTER_BWT; break;
                        default  : ShowUsage();
                    }
                    break;
                }

            case 'c':
                {
                    switch (*s++)
                    {
                        case 'f' : paramSortingContexts = LIBBSC_CONTEXTS_FOLLOWING; break;
                        case 'p' : paramSortingContexts = LIBBSC_CONTEXTS_PRECEDING; break;
                        case 'a' : paramSortingContexts = LIBBSC_CONTEXTS_AUTODETECT; break;
                        default  : ShowUsage();
                    }
                    break;
                }

            case 'H':
                {
                    char * strNum = s; while ((*s >= '0') && (*s <= '9')) s++;
                    paramLZPHashSize = atoi(strNum);
                    if ((paramLZPHashSize < 10) || (paramLZPHashSize > 28)) ShowUsage();
                    break;
                }

            case 'M':
                {
                    char * strNum = s; while ((*s >= '0') && (*s <= '9')) s++;
                    paramLZPMinLen = atoi(strNum);
                    if ((paramLZPMinLen < 4) || (paramLZPMinLen > 255)) ShowUsage();
                    break;
                }

#ifdef _OPENMP
            case 't': paramEnableParallelProcessing = 0; break;
            case 'T': paramEnableParallelProcessing = paramEnableMultiThreading = 0; break;
#endif

            case 'f': paramEnableFastMode       = 1; break;
            case 'l': paramEnableLZP            = 1; break;
            case 's': paramEnableSegmentation   = 1; break;
            case 'r': paramEnableReordering     = 1; break;

            case 'p': paramEnableLZP = paramEnableSegmentation = paramEnableReordering = 0; break;

            default : ShowUsage();
        }
    }
}

void ProcessCommandline(int argc, char * argv[])
{
    if (argc < 4 || strlen(argv[1]) != 1)
    {
        ShowUsage();
    }

    for (int i = 4; i < argc; ++i)
    {
        if (argv[i][0] == '-')
        {
            ProcessSwitch(&argv[i][1]);
        }
        else
        {
            ShowUsage();
        }
    }
}

// Kaze [

// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm [
// Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char.
char * Railgun (char * pbTarget,
     char * pbPattern,
     unsigned long cbTarget,
     unsigned long cbPattern)
{
    char *  pbTargetMax = pbTarget + cbTarget;
    register unsigned long  ulHashPattern;
    unsigned long  ulHashTarget;
    unsigned long  count;
    unsigned long  countSTATIC, countRemainder;

    long i; //BMH needed
    int a, j, bm_bc[ASIZE]; //BMH needed
    unsigned char ch; //BMH needed
//    unsigned char lastch, firstch; //BMH needed

    if (cbPattern > cbTarget)
        return(NULL);

        countSTATIC = cbPattern-2;

// Doesn't work when cbPattern = 1
if (cbTarget<961) // This value is arbitrary(don't know how exactly), it ensures(at least must) better performance than 'Boyer_Moore_Horspool'.
{
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));

    for ( ;; )
    {
        // The line below gives for 'cbPattern'>=1:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/543
        // Karp_Rabin_Kaze_4_OCTETS performance: 372KB/clock
/*
        if ( (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
            return((long)(pbTarget-cbPattern));
*/

        // The fragment below gives for 'cbPattern'>=2:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/546
        // Karp_Rabin_Kaze_4_OCTETS performance: 370KB/clock

        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
         count = countSTATIC;
         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
               count--;
         }
         if ( count == 0) return((pbTarget-cbPattern));
        }

        // The fragment below gives for 'cbPattern'>=2:
	// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/554
	// Karp_Rabin_Kaze_4_OCTETS performance: 364KB/clock
/*
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
         count = countSTATIC>>2;
         countRemainder = countSTATIC % 4;

         while ( count && *(unsigned long *)(pbPattern+1+((count-1)<<2)) == *(unsigned long *)(pbTarget-cbPattern+1+((count-1)<<2)) ) {
               count--;
         }
	 //if (count == 0) {  // Disastrous degradation only from this line(317KB/clock when 1+2x4+2+1 bytes pattern: 'skillessness'; 312KB/clock when 1+1x4+2+1 bytes pattern: 'underdog'), otherwise 368KB/clock.
         while ( countRemainder && *(char *)(pbPattern+1+(countSTATIC-countRemainder)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-countRemainder)) ) {
               countRemainder--;
         }
         //if ( countRemainder == 0) return((long)(pbTarget-cbPattern));
         if ( count+countRemainder == 0) return((long)(pbTarget-cbPattern));
         //}
        }
*/

        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
}
else
{
    /* Preprocessing */
    for (a=0; a < ASIZE; a++) bm_bc[a]=cbPattern;
    for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1;

    /* Searching */
    //lastch=pbPattern[cbPattern-1];
    //firstch=pbPattern[0];
    i=0;
    while (i <= cbTarget-cbPattern) {
       ch=pbTarget[i+cbPattern-1];
       //if (ch ==lastch)
          //if (memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) OUTPUT(i);
          //if (ch == lastch && pbTarget[i] == firstch && memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) return(i);  // Kaze: The idea(to prevent execution of slower 'memcmp') is borrowed from Karp-Rabin i.e. to perform a slower check only when the target "looks like".
          if (ch == pbPattern[cbPattern-1] && pbTarget[i] == pbPattern[0])
             {
         count = countSTATIC;
         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(&pbTarget[i]+1+(countSTATIC-count)) ) {
               count--;
         }
         if ( count == 0) return(pbTarget+i);
	     }
       i+=bm_bc[ch];
    }
    return(NULL);
}
}
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm ]


// ### Railgun_totalhits [
char * Railgun_totalhits (char * pbTarget,
     char * pbPattern,
     unsigned long cbTarget,
     unsigned long cbPattern)
{
    char *  pbTargetMax = pbTarget + cbTarget;
    register unsigned long  ulHashPattern;
    unsigned long  ulHashTarget;
    unsigned long  count;
    unsigned long  countSTATIC, countRemainder;

    long i; //BMH needed
    int a, j, bm_bc[ASIZE]; //BMH needed
    unsigned char ch; //BMH needed
//    unsigned char lastch, firstch; //BMH needed

    if (cbPattern > cbTarget)
        return(NULL);

        countSTATIC = cbPattern-2;

// Doesn't work when cbPattern = 1
if (cbTarget<961) // This value is arbitrary(don't know how exactly), it ensures(at least must) better performance than 'Boyer_Moore_Horspool'.
{
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));

    for ( ;; )
    {
        // The line below gives for 'cbPattern'>=1:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/543
        // Karp_Rabin_Kaze_4_OCTETS performance: 372KB/clock
/*
        if ( (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
            return((long)(pbTarget-cbPattern));
*/

        // The fragment below gives for 'cbPattern'>=2:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/546
        // Karp_Rabin_Kaze_4_OCTETS performance: 370KB/clock

        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
         count = countSTATIC;
         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
               count--;
         }
         if ( count == 0) Railgunhits++; //return((pbTarget-cbPattern));
        }

        // The fragment below gives for 'cbPattern'>=2:
	// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/554
	// Karp_Rabin_Kaze_4_OCTETS performance: 364KB/clock
/*
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
         count = countSTATIC>>2;
         countRemainder = countSTATIC % 4;

         while ( count && *(unsigned long *)(pbPattern+1+((count-1)<<2)) == *(unsigned long *)(pbTarget-cbPattern+1+((count-1)<<2)) ) {
               count--;
         }
	 //if (count == 0) {  // Disastrous degradation only from this line(317KB/clock when 1+2x4+2+1 bytes pattern: 'skillessness'; 312KB/clock when 1+1x4+2+1 bytes pattern: 'underdog'), otherwise 368KB/clock.
         while ( countRemainder && *(char *)(pbPattern+1+(countSTATIC-countRemainder)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-countRemainder)) ) {
               countRemainder--;
         }
         //if ( countRemainder == 0) return((long)(pbTarget-cbPattern));
         if ( count+countRemainder == 0) return((long)(pbTarget-cbPattern));
         //}
        }
*/

        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
}
else
{
    /* Preprocessing */
    for (a=0; a < ASIZE; a++) bm_bc[a]=cbPattern;
    for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1;

    /* Searching */
    //lastch=pbPattern[cbPattern-1];
    //firstch=pbPattern[0];
    i=0;
    while (i <= cbTarget-cbPattern) {
       ch=pbTarget[i+cbPattern-1];
       //if (ch ==lastch)
          //if (memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) OUTPUT(i);
          //if (ch == lastch && pbTarget[i] == firstch && memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) return(i);  // Kaze: The idea(to prevent execution of slower 'memcmp') is borrowed from Karp-Rabin i.e. to perform a slower check only when the target "looks like".
          if (ch == pbPattern[cbPattern-1] && pbTarget[i] == pbPattern[0])
             {
         count = countSTATIC;
         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(&pbTarget[i]+1+(countSTATIC-count)) ) {
               count--;
         }
         if ( count == 0) Railgunhits++; //return(pbTarget+i);
	     }
       i+=bm_bc[ch];
    }
    return(NULL);
}
}
// ### Railgun_totalhits ]


// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm [
// Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char.
char * Railgun_Quadruplet_totalhits (char * pbTarget,
     char * pbPattern,
     unsigned long cbTarget,
     unsigned long cbPattern)
{
    char * pbTargetMax = pbTarget + cbTarget;
    register unsigned long  ulHashPattern;
    unsigned long ulHashTarget;
    unsigned long count;
    unsigned long countSTATIC;
//  unsigned long countRemainder;

/*
    const unsigned char SINGLET = *(char *)(pbPattern);
    const unsigned long Quadruplet2nd = SINGLET<<8;
    const unsigned long Quadruplet3rd = SINGLET<<16;
    const unsigned long Quadruplet4th = SINGLET<<24;
*/
    unsigned char SINGLET;
    unsigned long Quadruplet2nd;
    unsigned long Quadruplet3rd;
    unsigned long Quadruplet4th;

    unsigned long  AdvanceHopperGrass;

    long i; //BMH needed
    int a, j, bm_bc[ASIZE]; //BMH needed
    unsigned char ch; //BMH needed
//    unsigned char lastch, firstch; //BMH needed

    if (cbPattern > cbTarget)
        return(NULL);

// Doesn't work when cbPattern = 1
// The next IF-fragment works very well with cbPattern>1, OBVIOUSLY IT MUST BE UNROLLED(but crippled with less functionality) SINCE either cbPattern=2 or cbPattern=3!
if ( cbPattern<4) { // This IF makes me unhappy: it slows down from 390KB/clock to 367KB/clock for 'fast' pattern. This fragment(for 2..3 pattern lengths) is needed because I need a function different than strchr but sticking to strstr i.e. lengths above 1 are to be handled.
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));
        countSTATIC = cbPattern-2;

    for ( ;; )
    {
        // The line below gives for 'cbPattern'>=1:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/543
        // Karp_Rabin_Kaze_4_OCTETS performance: 372KB/clock
/*
        if ( (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
            return((long)(pbTarget-cbPattern));
*/

        // The fragment below gives for 'cbPattern'>=2:
        // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/546
        // Karp_Rabin_Kaze_4_OCTETS performance: 370KB/clock

        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
         count = countSTATIC;
         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
               count--;
         }
         if ( count == 0) Railgunhits++; //return((pbTarget-cbPattern));
        }

        // The fragment below gives for 'cbPattern'>=2:
	// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/554
	// Karp_Rabin_Kaze_4_OCTETS performance: 364KB/clock
/*
        if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
         count = countSTATIC>>2;
         countRemainder = countSTATIC % 4;

         while ( count && *(unsigned long *)(pbPattern+1+((count-1)<<2)) == *(unsigned long *)(pbTarget-cbPattern+1+((count-1)<<2)) ) {
               count--;
         }
	 //if (count == 0) {  // Disastrous degradation only from this line(317KB/clock when 1+2x4+2+1 bytes pattern: 'skillessness'; 312KB/clock when 1+1x4+2+1 bytes pattern: 'underdog'), otherwise 368KB/clock.
         while ( countRemainder && *(char *)(pbPattern+1+(countSTATIC-countRemainder)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-countRemainder)) ) {
               countRemainder--;
         }
         //if ( countRemainder == 0) return((long)(pbTarget-cbPattern));
         if ( count+countRemainder == 0) return((long)(pbTarget-cbPattern));
         //}
        }
*/

        pbTarget++;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else { //if ( cbPattern<4)
if (cbTarget<961) // This value is arbitrary(don't know how exactly), it ensures(at least must) better performance than 'Boyer_Moore_Horspool'.
{
        pbTarget = pbTarget+cbPattern;
        ulHashPattern = *(unsigned long *)(pbPattern);
        countSTATIC = cbPattern-1;

    //SINGLET = *(char *)(pbPattern);
    SINGLET = ulHashPattern & 0xFF;
    Quadruplet2nd = SINGLET<<8;
    Quadruplet3rd = SINGLET<<16;
    Quadruplet4th = SINGLET<<24;

    for ( ;; )
    {
	AdvanceHopperGrass = 0;
	ulHashTarget = *(unsigned long *)(pbTarget-cbPattern);

        if ( ulHashPattern == ulHashTarget ) { // Three unnecessary comparisons here, but 'AdvanceHopperGrass' must be calculated - it has a higher priority.
         count = countSTATIC;
         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
	       if ( countSTATIC==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) AdvanceHopperGrass++;
               count--;
         }
         if ( count == 0) Railgunhits++; //return((pbTarget-cbPattern));
        } else { // The goal here: to avoid memory accesses by stressing the registers.
    if ( Quadruplet2nd != (ulHashTarget & 0x0000FF00) ) {
         AdvanceHopperGrass++;
         if ( Quadruplet3rd != (ulHashTarget & 0x00FF0000) ) {
              AdvanceHopperGrass++;
              if ( Quadruplet4th != (ulHashTarget & 0xFF000000) ) AdvanceHopperGrass++;;
         }
    }
	}

	AdvanceHopperGrass++;

	pbTarget = pbTarget + AdvanceHopperGrass;
        if (pbTarget > pbTargetMax)
            return(NULL);
    }
} else { //if (cbTarget<961)
        countSTATIC = cbPattern-2;
    /* Preprocessing */
    for (a=0; a < ASIZE; a++) bm_bc[a]=cbPattern;
    for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1;

    /* Searching */
    //lastch=pbPattern[cbPattern-1];
    //firstch=pbPattern[0];
    i=0;
    while (i <= cbTarget-cbPattern) {
       ch=pbTarget[i+cbPattern-1];
       //if (ch ==lastch)
          //if (memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) OUTPUT(i);
          //if (ch == lastch && pbTarget[i] == firstch && memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) return(i);  // Kaze: The idea(to prevent execution of slower 'memcmp') is borrowed from Karp-Rabin i.e. to perform a slower check only when the target "looks like".
          if (ch == pbPattern[cbPattern-1] && pbTarget[i] == pbPattern[0])
             {
         count = countSTATIC;
         while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(&pbTarget[i]+1+(countSTATIC-count)) ) {
               count--;
         }
         if ( count == 0) Railgunhits++; //return(pbTarget+i);
	     }
       i+=bm_bc[ch];
    }
    return(NULL);
} //if (cbTarget<961)
} //if ( cbPattern<4)
}
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm ]

// Kaze ]

int main(int argc, char * argv[])
{
    fprintf(stdout, "Graffith(graphite), Text decompressor-finder-dumper, r.02++_Graphein, written by Kaze.\n");
    fprintf(stdout, "Graffith is a wrapper over bsc version 2.3.0, written by Ilya Grebnov.\n\n");
//    fprintf(stdout, "This is bsc, Block Sorting Compressor. Version 2.3.0. 9 August 2010.\n");
//    fprintf(stdout, "Copyright (c) 2009-2010 Ilya Grebnov <Ilya.Grebnov@libbsc.com>.\n\n");

#if defined(_OPENMP) && defined(__INTEL_COMPILER)

    kmp_set_warnings_off();

#endif

    if (argc == 2 && *argv[1] == '#') {} else
    ProcessCommandline(argc, argv);
    switch (*argv[1])
    {
        case 'e' : case 'E' : Compression(argv); break; // Kaze
        case 'd' : case 'D' : Decompression(argv); break; // Kaze
        case '#' :
{
// GRAFFITH INIT [
if( ( fp_in1 = fopen( "GRAFFITH.lst", "rb" ) ) == NULL )
{ printf( "GRAFFITH: Can't open file GRAFFITH.lst.\n" ); return( 1 ); }
fseek( fp_in1, 0L, SEEK_END );
size_in1 = ftell( fp_in1 );
fseek( fp_in1, 0L, SEEK_SET );
printf( "Size of GRAFFITH.lst: %lu\n", size_in1 );

if( ( fp_in2 = fopen( "GRAFFITH.ini", "rb" ) ) == NULL )
{ printf( "GRAFFITH: Can't open file GRAFFITH.ini.\n" ); return( 1 ); }
fseek( fp_in2, 0L, SEEK_END );
size_in2 = ftell( fp_in2 );
fseek( fp_in2, 0L, SEEK_SET );
printf( "Size of GRAFFITH.ini: %lu\n", size_in2 );

if( ( fp_outLOG = fopen( "GRAFFITH.log", "wb" ) ) == NULL )
{ printf( "GRAFFITH: Can't open file GRAFFITH.log.\n" ); return( 1 ); }

printf( "Allocating memory %lu MB ... ", (GRFFTH_DE_buffer_MAXSIZE>>20)+0);
GRFFTH_DE_buffer_POINTER = (char *)malloc( GRFFTH_DE_buffer_MAXSIZE );
if( GRFFTH_DE_buffer_POINTER == NULL )
{ puts( "\nGRAFFITH: Needed memory allocation denied!\n" ); return( 1 ); }
printf( "OK\n" );
// GRAFFITH INIT ]

// Find Number Of Files to Proceed: [
        for( k1 = 0; k1 < size_in1; k1++ )
	{
                fread( &workbyte, 1, 1, fp_in1 );
                if( workbyte != 10 )
                { if( workbyte != 13 ) // NON UNIX
                  { if( LINE10len1 < 255 ) { LINE10_1[ LINE10len1 ] = workbyte; }
                    LINE10len1++;
                  }
                  else
                  {
                  }
		}
		else
                { if( 1 <= LINE10len1 && LINE10len1 <= 255 )
                  { LINE10_1[ LINE10len1 ] = 0;
NumberOfFilesToDecompress++;
        LINE10len1 = 0;
LINE10_1[ LINE10len1 ] = 0;
                  }
                }
        } // k1 'for'
fseek( fp_in1, 0L, SEEK_SET );
// Find Number Of Files to Proceed: ]

// MAIN CYCLE =============================================================================================================[
        for( k1 = 0; k1 < size_in1; k1++ )
	{
                fread( &workbyte, 1, 1, fp_in1 );
                if( workbyte != 10 )
                { if( workbyte != 13 ) // NON UNIX
                  { if( LINE10len1 < 255 ) { LINE10_1[ LINE10len1 ] = workbyte; }
                    LINE10len1++;
                  }
                  else
                  {
                  }
		}
		else
                { if( 1 <= LINE10len1 && LINE10len1 <= 255 )
                  { LINE10_1[ LINE10len1 ] = 0;
printf( "Remaining files to decompress: %lu\n", NumberOfFilesToDecompress--);
printf( "Decompressing '%s' ...\n", LINE10_1 );
argv[2] = LINE10_1;
GRFFTH_DE_buffer_SIZE = 0;
Decompression(argv);

// GRAFFITH SEARCH [
LINE10len2 = 0;
fseek( fp_in2, 0L, SEEK_SET );
// SUB-MAIN CYCLE =========================================================================================================[
        for( k2 = 0; k2 < size_in2; k2++ )
	{
                fread( &workbyte, 1, 1, fp_in2 );
                if( workbyte != 10 )
                { if( workbyte != 13 ) // NON UNIX
                  { if( LINE10len2 < 255 ) { LINE10_2[ LINE10len2 ] = workbyte; }
                    LINE10len2++;
                  }
                  else
                  {
                  }
		}
		else
                { if( 1 <= LINE10len2 && LINE10len2 <= 255 )
                  { LINE10_2[ LINE10len2 ] = 0;

// The presence of a wildcard decides type of searching:
PatternLEN = KAZE_strlen(LINE10_2);
    double startTime = BSC_CLOCK();
if ( memchrKAZE(LINE10_2, &TAGfree[0], PatternLEN) == NULL && \
     memchrKAZE(LINE10_2, &TAGfree[1], PatternLEN) == NULL && \
     memchrKAZE(LINE10_2, &TAGfree[2], PatternLEN) == NULL && \
     memchrKAZE(LINE10_2, &TAGfree[3], PatternLEN) == NULL && \
     memchrKAZE(LINE10_2, &TAGfree[4], PatternLEN) == NULL && \
     memchrKAZE(LINE10_2, &TAGfree[5], PatternLEN) == NULL && \
     memchrKAZE(LINE10_2, &TAGfree[6], PatternLEN) == NULL )
{
printf( "Entire-file-searching with Railgun_Quadruplet for '%s' ...\n", LINE10_2 );

// #[
// Pattern must be equal to Line!
// The buffer for search looks like:
// Line #1CRLF
// Line #2CRLF
// So first attempt is for 'Line #1CR' must be equal to 'PatternCR'
// So next attempts are for 'LFLine #2CR' must be equal to 'LFPatternCR'
Railgunhits=0;
LINE10_2[ LINE10len2 ] = 13;
// Bastumante do not forget the need for strlen(LINE10_2)>1:
FoundInPTR = Railgun_Quadruplet_totalhits(GRFFTH_DE_buffer_POINTER, LINE10_2, PatternLEN+1, PatternLEN+1); // Pattern+CR
	if ( Railgunhits != 0 ) {
           LINE10_2[ LINE10len2 ] = 0;
           fprintf( fp_outLOG, "%s\r\n", LINE10_2);
        } else {
LINE10len2++;
for( Graphein = 0; Graphein < LINE10len2; Graphein++ ) LINE10_2[ LINE10len2-Graphein ] = LINE10_2[ LINE10len2-1-Graphein ];
LINE10_2[ 0 ] = 10;
FoundInPTR = Railgun_Quadruplet_totalhits(GRFFTH_DE_buffer_POINTER, LINE10_2, GRFFTH_DE_buffer_SIZE, PatternLEN+2); // LF+Pattern+CR
	if ( Railgunhits != 0 ) {
           LINE10_2[ LINE10len2 ] = 0;
           fprintf( fp_outLOG, "%s\r\n", &LINE10_2[1]);
        }
	}
// #]

    TotalSearchRailgunTime = TotalSearchRailgunTime + (BSC_CLOCK() - startTime);
if (TotalSearchRailgunTime == 0) TotalSearchRailgunTime++;
    TotalSearchRailgunSize = TotalSearchRailgunSize + GRFFTH_DE_buffer_SIZE;
    fprintf(stdout, "\r~ Overall Railgun_Quadruplet search performance: %.2f MB/s so far\n", (TotalSearchRailgunSize/1024/1024)/TotalSearchRailgunTime);
TotalRailgunhits = TotalRailgunhits + Railgunhits;
printf( "~ Railgun_Quadruplet current/total hits: %lu/%lu\n", Railgunhits, TotalRailgunhits);
} else
{
printf( "Single-line-searching with Blunderbuss for '%s' ...\n", LINE10_2 );
WILDgunhits = 0;
//Blunderbuss: Alteration of Dutch donderbus: donder, thunder + bus, gun.
WILDgunhits = Blunderbuss(GRFFTH_DE_buffer_POINTER, LINE10_2, GRFFTH_DE_buffer_SIZE, PatternLEN);
    TotalSearchBlunderbussTime = TotalSearchBlunderbussTime + (BSC_CLOCK() - startTime);
if (TotalSearchBlunderbussTime == 0) TotalSearchBlunderbussTime++;
    TotalSearchBlunderbussSize = TotalSearchBlunderbussSize + GRFFTH_DE_buffer_SIZE;
    fprintf(stdout, "\r~ Overall Blunderbuss search performance: %.2f MB/s so far\n", (TotalSearchBlunderbussSize/1024/1024)/TotalSearchBlunderbussTime);
TotalWILDgunhits = TotalWILDgunhits + WILDgunhits;
printf( "~ Blunderbuss current/total hits: %lu/%lu\n", WILDgunhits, TotalWILDgunhits);
}
fflush(fp_outLOG); // Not sure: CTRL+C doesn't flush?!

        LINE10len2 = 0;
LINE10_2[ LINE10len2 ] = 0;
                  }
                }
        } // k2 'for'
// SUB-MAIN CYCLE =========================================================================================================]
// GRAFFITH SEARCH ]

        LINE10len1 = 0;
LINE10_1[ LINE10len1 ] = 0;
                  }
                }
        } // k1 'for'
// MAIN CYCLE =============================================================================================================]
printf( "\nGRAFFITH: Total Blunderbuss(wildcard) hits: %lu\n", WildGLOBALhits);
printf( "GRAFFITH: Done.\n");
break;
}
        default  : ShowUsage();
    }

    return 0;
}

/*-----------------------------------------------------------*/
/* End                                               bsc.cpp */
/*-----------------------------------------------------------*/
