// This is source of Leprechaun revision 16FIX: Leprechaun_x-leton.c, copyleft Sanmayce, 2012-Dec-16.
// How embarrassing! A stupid bug was fixed, namely one missed 'if ( REUSE == 0 ) {}' holding the TRAVERSE segment - this segment nullifies LEAF addresses thus making W/w unable to retraverse.

// This is source of Leprechaun revision 16: Leprechaun_x-leton.c, copyleft Sanmayce, 2012-Dec-13.
// The new feature is the ability to reuse the external hash-tree structure.
// The option is W/w similar to Z/z. This way the latency i.e. the response time is <1s.

// This is source of Leprechaun revision 15FIXFIX+: Leprechaun_x-leton.c, copyleft Sanmayce, 2012-Dec-11.
// The new feature is the ability to command Leprechaun (from inside the list file with 2 metacommands) to enter/exit INSERT mode.
// This allows to control whether new (to current hash-tree structure) x-grams are to be counted [and] INSERTed.

// Usage:
// E:\_Gamera_r15_12348>type ON.txt
// Leprechaun says x-gram inserting disabled for next files: ON
// 
// E:\_Gamera_r15_12348>type OFF.txt
// Leprechaun says x-gram inserting disabled for next files: OFF
// 
// E:\_Gamera_r15_12348>dir Your_textual_folders/b/s/a-d>go.lst
// E:\_Gamera_r15_12348>copy go.lst+on.txt+_Gamera.tar.3.sorted.4andabove.lst MetaLep.lst /b
// E:\_Gamera_r15_12348>type MetaLep.lst
// E:\_Gamera_r15_12348\Your_textual_folders\Example.txt
// Leprechaun says x-gram inserting disabled for next files: ON
// _Gamera.tar.3.sorted.4andabove.txt
// 
// E:\_Gamera_r15_12348>Leprechaun_x-leton_META_32bit_03_01p.exe MetaLep.lst MetaLep.3.wrd 1234567 Y

// All lines new to r15FIXFIX are with commented part //15FIXFIX+

/*
This is source of Leprechaun revision 15FIXFIX: Leprechaun_x-leton.c, copyleft Sanmayce, 2011-Dec-14. 2011-Mar-07: Fixed a small command line parsing bug.

The 15FIXFIX differs from 15fix with:
[a bug fixed(REALLY FIXED!): Fixed a nasty bug causing very restrictive way of forming x-grams.]
The 15fix differs from 15 with:
[a bug fixed: division by zero when finishing-starting time is under 1 second
Fixed a nasty bug causing very restrictive way of forming x-grams.]
The 15 differs from 14+++++FIXFIX with:
[
Only some more stats at the end.
]
The 14+++++FIXFIX differs from 14++++FIX with:
[
Bugs in LOG stats in r.14++++FIX:
Not nullified variables during passes - must be nullified.
Number Of Trees(GREATER THE BETTER): 195,939
Number Of LEAFs(littler THE BETTER) not counting ROOT LEAFs: 654,428
Total Attempts to Find/Put WORDs into B-trees order 3: 39,042,828
Highest Tree not counting ROOT Level i.e. CORONA levels(littler THE BETTER): 3
]
The 14++++FIX differs from 14+++ with:
[
1)Fixed occurrencies bug due to not NULLifying the field housing the occurrencies, a nasty thing: all the revisions 14??? were buggy, how stupid from my side, grrrr.
2)Ability to rip in passes:
#define HashChunkSizeInBITS 26 // Defines the number of passes. Should be smaller or equal to HashInBITS. If HashInBITS == HashChunkSizeInBITS then 2^(HashInBITS-HashChunkSizeInBITS)=2^0=1 passe(s).
]
The 14+++ differs from 14++ with:
[
//Only one must be uncommented:
//#define singleton
//#define doubleton
//#define tripleton
#define quadrupleton
//#define quintupleton
//#define sextupleton
//#define septupleton
//#define octupleton
//#define nonupleton
//#define decupleton
1 	One 	single,    singlet,    singleton
2 	Two 	double,    doublet,    doubleton
3 	Three 	triple,    triplet,    tripleton
4 	Four 	quadruple, quadruplet, quadrupleton
5 	Five 	quintuple, quintuplet, quintupleton
6 	Six 	sextuple,  sextuplet,  sextupleton
7 	Seven 	septuple,  septuplet,  septupleton
8 	Eight 	octuple,   octuplet,   octupleton
9       Nine    nonuple,   nonuplet,   nonupleton
10      Ten,    decuple,   decuplet,   decupleton
1 	One 	ace, single, singleton, unary, unit, unity
2 	Two 	binary, brace, couple, couplet, distich, deuce, double, doubleton, duad, duality, duet, duo, dyad, pair, snake eyes, span, twain, twosome, yoke
3 	Three 	deuce-ace, leash, set, tercet, ternary, ternion, terzetto, threesome, tierce, trey, triad, trine, trinity, trio, triplet, troika, hat-trick
4 	Four 	foursome, quadruplet, quatern, quaternary, quaternion, quaternity, quartet, tetrad
5 	Five 	cinque, fin, fivesome, pentad, quint, quintet, quintuplet
6 	Six 	half dozen, hexad, sestet, sextet, sextuplet, sise
7 	Seven 	heptad, septet, septuplet
8 	Eight 	octad, octave, octet, octonary, octuplet, ogdoad
Also, in addition to 'Y' and 'Z', 'y' and 'z' were added in order to be able to dump only n-grams without occurrencies.
]
A lazy approach is applied in order to add occurrences of each 4-gram:
- just reserve the last 4bytes in 'wrd' for counter as follows:
  'LongestLineInclusive' has to be greater than 31 (51 looks good enough) in order not to miss longer 4-grams like: encourage_innovative_approaches_to
  char FourGram[LongestLineInclusive+1+4]; // 31bytes longest 4-gram + 1byte NULL + 4bytes COUNTER
- the laziness lies here:
  no need to make the four bytes to house the value 1 when a new 'wrd' is being inserted (either in step 1 or step 3) just add 1 at final traverse dump,
  in step 1 when a 'wrd' is found then add 1 to the counter only if it is not 9,999,999 already (limitation enforced on counter).
- when dumping the format has to be: 
  0,000,001\ta_b_c_d
  in order to sort the whole lines later with external Qsort and have easy screening for rare/wrong/useless 4-grams.

Comment/Uncomment accordingly in order to compile: 
//#define _WIN32_ENVIRONMENT_
#define _POSIX_ENVIRONMENT_

Windows compile(uncomment #include <io.h> line, ignore warnings):
For Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 13.10.3077 for 80x86 use:
cl /Ox /Wp64 /TcLeprechaun.c /FaLeprechaun

Windows compile(comment #include <io.h> line, ignore warnings):
For Intel(R) C++ Compiler Professional for applications running on IA-32, Version 11.1 use:
icl /Ox /Wp64 /TcLeprechaun.c /FaLeprechaun /w /QxHOST

Linux compile(ignore warnings):
gcc -D_FILE_OFFSET_BITS=64 -m64 -static -O3 -mtune=generic Leprechaun_quadrupleton.c -o Leprechaun_quadrupleton_r14_generic_64bits.elf

!!! For some reason a nasty bug (some UFO/wrong occurrences before phrases in the resultant file) occurs when 32bit (supposedly the opposite of the expected) code is generated:
gcc -D_FILE_OFFSET_BITS=64 -m32 -static -O3 -mtune=generic Leprechaun_quadrupleton.c -o Leprechaun_quadrupleton_r14_generic_32bits.elf

[It's a little weird(Intel boosts the sort while falls behind in parsing, tested on T3400):]

Leprechaun_r13_7pluses_Microsoft_32-bit_16.00.30319.01.exe _vs_ Wikipedia_22,202,980_LATIN-Words:
Words per second performance: 1,679,585W/s
Time for making unsorted wordlist: 30 second(s)
Time for sorting unsorted wordlist: 25 second(s)

Leprechaun_r13_7pluses_Intel_IA-32_11.1.exe _vs_ Wikipedia_22,202,980_LATIN-Words:
Words per second performance: 1,603,240W/s
Time for making unsorted wordlist: 31 second(s)
Time for sorting unsorted wordlist: 19 second(s)

Due to my ignorance(calderas in my C knowledge): 64bit code cannot be generated, for now.
Any improvement is welcome.
Enjoy!
*/

// C:\WorkTemp\Leprechaun_r13++\Visual C++ Toolkit 2003\Leprechaun_r13++++++_C_EXE>cl /Ox /Wp64 /TcLeprechaun.c /FaLeprechaun
// Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 13.10.3077 for 80x86
// Copyright (C) Microsoft Corporation 1984-2002. All rights reserved.
// 
// Leprechaun.c
// Leprechaun.c(829) : warning C4312: 'type cast' : conversion from 'int' to 'string' of greater size
// Leprechaun.c(849) : warning C4312: 'type cast' : conversion from 'int' to 'string * ' of greater size
// Leprechaun.c(2048) : warning C4312: 'type cast' : conversion from 'int' to 'char *' of greater size
// Leprechaun.c(2063) : warning C4311: 'type cast' : pointer truncation from 'char *' to 'unsigned long'
// Leprechaun.c(2068) : warning C4311: 'type cast' : pointer truncation from 'char *' to 'unsigned long'
// Leprechaun.c(2371) : warning C4312: 'type cast' : conversion from 'unsigned long' to 'char *' of greater size
// Leprechaun.c(2570) : warning C4312: 'type cast' : conversion from 'unsigned long' to 'char *' of greater size
// Leprechaun.c(2626) : warning C4312: 'type cast' : conversion from 'unsigned long' to 'char *' of greater size
// Leprechaun.c(2657) : warning C4312: 'type cast' : conversion from 'unsigned long' to 'char *' of greater size
// Leprechaun.c(2663) : warning C4312: 'type cast' : conversion from 'unsigned long' to 'char *' of greater size
// Leprechaun.c(2668) : warning C4312: 'type cast' : conversion from 'unsigned long' to 'char *' of greater size
// Leprechaun.c(2696) : warning C4312: 'type cast' : conversion from 'unsigned long' to 'char *' of greater size
// Leprechaun.c(2729) : warning C4312: 'type cast' : conversion from 'unsigned long' to 'char *' of greater size
// Leprechaun.c(2743) : warning C4312: 'type cast' : conversion from 'unsigned long' to 'char *' of greater size
// Leprechaun.c(2755) : warning C4312: 'type cast' : conversion from 'unsigned long' to 'char *' of greater size
// Microsoft (R) Incremental Linker Version 7.10.3077
// Copyright (C) Microsoft Corporation.  All rights reserved.
//
// /out:Leprechaun.exe
// Leprechaun.obj
//
// C:\WorkTemp\Leprechaun_r13++\Visual C++ Toolkit 2003\Leprechaun_r13++++++_C_EXE>

/*
Below is the gain in 13++ and 13+++:

Words per second performance: 5,974,513W/s
Word count: 4,582,451,898 of them 9,177,221 distinct
Number Of Trees(GREATER THE BETTER): 2855919
Number Of Hash Collisions(Distinct WORDs - Number Of Trees): 6321302

Words per second performance: 6,329,353W/s
Word count: 4,582,451,898 of them 9,177,221 distinct
Number Of Trees(GREATER THE BETTER): 2958681
Number Of Hash Collisions(Distinct WORDs - Number Of Trees): 6218540

The aftermath: 6,321,302 - 6,218,540 = 102,762 less collisions while the speed of hash is not slower for sure - I call this: double trouble avoidance.
Thanks to Fowler/Noll/Vo hash inventors.
*/

/*
Let's see the supplementary-clash on Intel Pentium T3400 Merom-1M 2166MHz:
Binary-Search-Trees vs B-Trees of order 3

C:\WorkTemp\Leprechaun_r13++\Visual C++ Toolkit 2003\Leprechaun_step_1_PAIR-QUEST>Leprechaun_Microsoft.exe Leprechaun_vs_Wikipedia_en-WORDS.lst Leprechaun_vs_Wikipedia_en-WORDS.wrd 4777 x
Leprechaun(Fast Greedy Word-Ripper), revision 13++++++, written by Svalqyatchx.
Leprechaun: 'Oh, well, didn't you hear? Bigger is good, but jumbo is dear.'
Kaze: Let's see what a 3-way hash + 6,602,752 Binary-Search-Trees can give us,
      also the performance of a 3-way hash + 6,602,752 B-Trees of order 3.
Size of input file with files for Leprechauning: 27
Allocating memory 1863MB ... OK
Size of Input TEXTual file: 146,973,879
\; Word count: 12,561,874 of them 12,561,874 distinct; Done: 64/64
Bytes per second performance: 14,697,387B/s
Words per second performance: 1,256,187W/s
Flushing unsorted words ...
Time for making unsorted wordlist: 15 second(s)
Deallocated memory in MB: 1863
Allocated memory for words in MB: 141
Allocated memory for pointers-to-words in MB: 48
Sorting(with 'MultiKeyQuickSortX26Sort' by J. Bentley and R. Sedgewick) ...
Sort pass 26/26 ...
Flushing sorted words ...
Time for sorting unsorted wordlist: 14 second(s)
Leprechaun: Done.

[An excerpt of Leprechaun.LOG:]
Number Of Trees(GREATER THE BETTER): 2786806
Total Attempts to Find/Put WORDs into Binary-Search-Trees: 58,935,172
Total Number of LEAFs in Binary-Search-Trees(GREATER THE BETTER): 2,786,806

C:\WorkTemp\Leprechaun_r13++\Visual C++ Toolkit 2003\Leprechaun_step_1_PAIR-QUEST>Leprechaun_Microsoft.exe Leprechaun_vs_Wikipedia_en-WORDS.lst Leprechaun_vs_Wikipedia_en-WORDS.wrd 4777 y
Leprechaun(Fast Greedy Word-Ripper), revision 13++++++, written by Svalqyatchx.
Leprechaun: 'Oh, well, didn't you hear? Bigger is good, but jumbo is dear.'
Kaze: Let's see what a 3-way hash + 6,602,752 Binary-Search-Trees can give us,
      also the performance of a 3-way hash + 6,602,752 B-Trees of order 3.
Size of input file with files for Leprechauning: 27
Allocating memory 1863MB ... OK
Size of Input TEXTual file: 146,973,879
\; Word count: 12,561,874 of them 12,561,874 distinct; Done: 64/64
Bytes per second performance: 24,495,646B/s
Words per second performance: 2,093,645W/s
Flushing unsorted words ...
Time for making unsorted wordlist: 12 second(s)
Deallocated memory in MB: 1863
Allocated memory for words in MB: 141
Allocated memory for pointers-to-words in MB: 48
Sorting(with 'MultiKeyQuickSortX26Sort' by J. Bentley and R. Sedgewick) ...
Sort pass 26/26 ...
Flushing sorted words ...
Time for sorting unsorted wordlist: 14 second(s)
Leprechaun: Done.

[An excerpt of Leprechaun.LOG:]
Number Of Trees(GREATER THE BETTER): 2786806
Total Attempts to Find/Put WORDs into B-trees order 3: 18,534,910

C:\WorkTemp\Leprechaun_r13++\Visual C++ Toolkit 2003\Leprechaun_step_1_PAIR-QUEST>type Leprechaun_vs_Wikipedia_en-WORDS.lst
wikipedia-en-html.tar.wrd

C:\WorkTemp\Leprechaun_r13++\Visual C++ Toolkit 2003\Leprechaun_step_1_PAIR-QUEST>dir Leprechaun_vs_Wikipedia_en-WORDS.*
 Volume in drive C is H320_Vol2
 Volume Serial Number is A094-FAE2

 Directory of C:\WorkTemp\Leprechaun_r13++\Visual C++ Toolkit 2003\Leprechaun_step_1_PAIR-QUEST

09/14/2010  06:04 AM                27 Leprechaun_vs_Wikipedia_en-WORDS.lst
09/15/2010  02:51 AM       146,973,879 Leprechaun_vs_Wikipedia_en-WORDS.wrd
               2 File(s)    146,973,906 bytes
               0 Dir(s)     965,787,648 bytes free

Conclusion:
18,534,910/12,561,874=1.475 Average Attempts to Find/Put WORDs into B-trees order 3, not bad at all.
*/

// To do: must learn how to align, at last.
/*
Matt Mahoney ZPAQ fragment:
  T *data;  // allocated memory
  int offset;
  ...
  offset=64-int((long)data&63);
  data=(T*)((char*)data+offset);  // adjust to 64 byte boundary

quicklz.c fragment:
#define QLZ_ALIGNMENT_PADD 8
unsigned char *scratch_aligned = (unsigned char *)scratch_compress + QLZ_ALIGNMENT_PADD - (((size_t)scratch_compress) % QLZ_ALIGNMENT_PADD);
size_t *buffersize = (size_t *)scratch_aligned;

minilzo.c fragment:
#define lzo_uintptr_t       unsigned long
#define PTR(a)              ((lzo_uintptr_t) (a))
#define PTR_LINEAR(a)       PTR(a)
#define PTR_ALIGNED_4(a)    ((PTR_LINEAR(a) & 3) == 0)
*/

//__declspec(align(64)) int BigArray[1024]; // Windows syntax
//or
//int BigArray[1024] __attribute__((aligned(64))); // Linux syntax

#if defined(_WIN32_ENVIRONMENT_)
__declspec(align(64))
#else
//__attribute__((aligned(64)));
#endif /* defined(_WIN32_ENVIRONMENT_)  */

typedef unsigned short WORD; // As for 'With *(DWORD*), a buffer overrun is possible at the end of a memory page.' I knew about it but was fooled by assembly code generated by VS2010 which translates it to a word access:
//; 792 : hash32 = FNV_32A_OP32(hash32, *(UINT*)p&0xFFFF);

typedef unsigned int UINT;
typedef unsigned int DWORD;

/*
Enter-the-BESTer or an alchemical clash of pairs of primes.

When an x-bit hash where x < 16 and is not a power of 2 is needed,
here comes 'FNV1A_Hash_4_OCTETS': a slightly tuned FNV1A hash for a huge(22,202,980) wordlist of latin-letters-words.

Two improvements for the generic(base) FNV1A hash:
- first, better speed: by reducing 'imul' instructions when string is 4++ chars
- second, better dispersion: by experimenting(superficially-lite test done, so far) with 'FNV1_32_PRIME'

Or more concretely:
- For FNV1_32_INIT = 2166136261
- Giving to 'FNV1_32_PRIME' all primes between 2 and 11987
- Shifting by 16bits instead of 13bits, when 8192 slots are used

C code:
typedef unsigned char u_int8_t;
typedef unsigned long u_int32_t;

#define FNV1_32_INIT ((u_int32_t)2166136261)
#define FNV1_32_PRIME ((u_int32_t)1607)

#define FNV_32A_OP(hash, octet) \
    (((u_int32_t)(hash) ^ (u_int8_t)(octet)) * FNV1_32_PRIME)

#define FNV_32A_OP32(hash, octet) \
    (((u_int32_t)(hash) ^ (u_int32_t)(octet)) * FNV1_32_PRIME)

0800 // Invoking: FNV1A_Hash_4_OCTETS(wrd, wrdlen>>2) // = 0,1,2,3,4,5,6,7 [1..31]
0801 int FNV1A_Hash_4_OCTETS(char *str, int wrdlen_QUADRUPLETS)
0802 { 
0803 u_int32_t hash;
0804 char *p;
0805 
0806 hash = FNV1_32_INIT;
0807 p=str;
0808 
0809 // The goal of stage #1: to reduce number of 'imul's.
0810 
0811 // Stage #1:
0812 for (; wrdlen_QUADRUPLETS != 0; --wrdlen_QUADRUPLETS) {
0813     hash = FNV_32A_OP32(hash, (unsigned long)*(long *)p); // mov edi, DWORD PTR [eax]
0814     p=p+4; // add eax, 4
0815 }
0816 
0817 // Stage #2:
0818 for (; *p; ++p) {
0819     hash = FNV_32A_OP(hash, *p); // mov dl, BYTE PTR [eax]
0820 }
0821 
0822   //return ((hash>>13) ^ hash) & 8191; // (((u_int32_t)1<<(x))-1) where x=13
0823   return ((hash>>16) ^ hash) & 8191; // 00..8191 i.e. 2^13=8192
0824 }

Assembler code:
_FNV1A_Hash_4_OCTETS PROC NEAR
; Line 812
	mov	edx, DWORD PTR _wrdlen_QUADRUPLETS$[esp-4]
	test	edx, edx
	mov	eax, DWORD PTR _str$[esp-4]
	push	esi
	mov	esi, DWORD PTR _FNV1_32_PRIME
	mov	ecx, -2128831035
	je	SHORT $L1612
	push	edi
	npad	7
$L1610:
; Line 813
	mov	edi, DWORD PTR [eax]
	xor	edi, ecx
	imul	edi, esi
; Line 814
	add	eax, 4
	dec	edx
	mov	ecx, edi
	jne	SHORT $L1610
	pop	edi
$L1612:
; Line 818
	mov	dl, BYTE PTR [eax]
	test	dl, dl
	je	SHORT $L1619
$L1617:
; Line 819
	movzx	edx, dl
	xor	edx, ecx
	imul	edx, esi
	inc	eax
	mov	ecx, edx
	mov	dl, BYTE PTR [eax]
	test	dl, dl
	jne	SHORT $L1617
$L1619:
; Line 823
	mov	eax, ecx
	shr	eax, 16
	xor	eax, ecx
	and	eax, 8191
	pop	esi
; Line 824
	ret	0
_FNV1A_Hash_4_OCTETS ENDP


So, 'FNV1A_Hash_4_OCTETS' calculates faster and gives better distribution(3549448 for 1607), which is 0.6% better(less collisions), than generic 'FNV1A_Hash' with 3527916.

FNV proves to be great, dealing with 4x8bits(four octets) at once doesn't hurt distribution at all, I was amazed by consistency(stable behaviour) of 'FNV1A_Hash_4_OCTETS'.

I want to make a total clash of all possible pairs 'FNV1_32_INIT' & 'FNV1_32_PRIME' in order to lessen even a few thousand collisions. 
This is critical for speed performance e.g. when 30,974,750,142 words, the case of wikipedia-en-html.tar, must be hashed.
The current obstacle is needed-time: each filling (26 slots x 31 sub-slots x 8192 sub-sub-slots) executes in 32-36 seconds for each pair.
Such an easy task, but I can't see how to get done, it is not hard but slow even with 15 times faster testbed.

Between 1..1166136247 there are 58,834,113 primes (inclusive).
Between 1..16777619 there are 1,077,891 primes (inclusive).
Or 58834113*1077891 = 63,416,760,895,683 pairs or 2,010,932 years needed at one-pair-per-second rate.

Finding THE best pair in my opinion is a total alchemy, due to the very nature of hashing: which is mainly alchemical and partly scientific.
Since the magnum corpus of words is static-enough, THE pair is worthy to be found.

It doesn't take a think-tank to see the superiority of FNV, Fowler/Noll/Vo did reveal a thing of beauty.

Performance of 'FNV1A_Hash_4_OCTETS': 10236 words/clock or 105 MB/s|3,549,448 used slots (best)

CASE #1: with 'if (strlen(backup[j]) != 0)' before each execution
Performance of 'KuxHash3plus' aka '2in1': 8076 words/clock or 82 MB/s|3,410,463 used slots (worst)
Performance of 'FNV1A_Hash': 8079 words/clock or 83 MB/s|3,527,916 used slots
Performance of 'FNV1A_Hash_SHIFTless_XORless': 8109 words/clock or 83 MB/s|3,540,323 used slots

CASE #2: without 'if (strlen(backup[j]) != 0)' before each execution
Performance of 'KuxHash3plus' aka '2in1': 11673 words/clock or 119 MB/s|3,410,463 used slots (worst)
Performance of 'FNV1A_Hash': 11558 words/clock or 118 MB/s|3,527,916 used slots
Performance of 'FNV1A_Hash_SHIFTless_XORless': 11570 words/clock or 118 MB/s|3,540,323 used slots

Note: 
The 'strlen' overhead(CASE #1) is necessary due to priorly(before hash invocation) needed len-of-string for 'FNV1A_Hash_4_OCTETS'.
Almost always, that is the case, since parsing of incoming text must know length of words/lines/files.
In case of not knowing this length: ((119-105)/105)*100% = 13% degradation is unacceptable.
The 'strlen' is an awful brake.
Also whether the code overhead(one additional cycle) of 'FNV1A_Hash_4_OCTETS' is so successful(as a trade-off) or the testbed is deceiving I do not know, here I am not so sure regardless of notorious delays caused by 'imul' and 'div' instructions.
*/

/*
FNV1_32_PRIME: //?: 16777619

Above Binary-Search-Tree with MaxPEAK = 61 has NODEs = 61 and LEAFs = 1
Words per second performance: 1,046,822W/s
Input File with a list of TEXTual Files: Leprechaun_vs_Wikipedia_en-WORDS.lst
Size of all TEXTual Files: 146,973,879
Word count: 12,561,874 of them 12,561,874 distinct
Number Of Trees(GREATER THE BETTER): 2775839

Above Binary-Search-Tree with MaxPEAK = 39 has NODEs = 72 and LEAFs = 15
Words per second performance: 1,356,588W/s
Input File with a list of TEXTual Files: Leprechaun_vs_Wikipedia_LATIN-WORDS.lst
Size of all TEXTual Files: 415,982,896
Word count: 35,271,297 of them 22,202,980 distinct
Number Of Trees(GREATER THE BETTER): 3539690

FNV1_32_PRIME: //3549448: 1607

Above Binary-Search-Tree with MaxPEAK = 61 has NODEs = 61 and LEAFs = 1
Words per second performance: 1,046,822W/s
Input File with a list of TEXTual Files: Leprechaun_vs_Wikipedia_en-WORDS.lst
Size of all TEXTual Files: 146,973,879
Word count: 12,561,874 of them 12,561,874 distinct
Number Of Trees(GREATER THE BETTER): 2783970

Above Binary-Search-Tree with MaxPEAK = 38 has NODEs = 50 and LEAFs = 11
Words per second performance: 1,410,851W/s
Input File with a list of TEXTual Files: Leprechaun_vs_Wikipedia_LATIN-WORDS.lst
Size of all TEXTual Files: 415,982,896
Word count: 35,271,297 of them 22,202,980 distinct
Number Of Trees(GREATER THE BETTER): 3549395

FNV1_32_PRIME: //3550132: 175757909

Above Binary-Search-Tree with MaxPEAK = 60 has NODEs = 60 and LEAFs = 1
Words per second performance: 966,298W/s
Input File with a list of TEXTual Files: Leprechaun_vs_Wikipedia_en-WORDS.lst
Size of all TEXTual Files: 146,973,879
Word count: 12,561,874 of them 12,561,874 distinct
Number Of Trees(GREATER THE BETTER): 2784479

Above Binary-Search-Tree with MaxPEAK = 39 has NODEs = 64 and LEAFs = 12
Words per second performance: 1,410,851W/s
Input File with a list of TEXTual Files: Leprechaun_vs_Wikipedia_LATIN-WORDS.lst
Size of all TEXTual Files: 415,982,896
Word count: 35,271,297 of them 22,202,980 distinct
Number Of Trees(GREATER THE BETTER): 3550115

FNV1_32_PRIME: //3550687: 201887489

Above Binary-Search-Tree with MaxPEAK = 60 has NODEs = 60 and LEAFs = 1
Words per second performance: 966,298W/s
Input File with a list of TEXTual Files: Leprechaun_vs_Wikipedia_en-WORDS.lst
Size of all TEXTual Files: 146,973,879
Word count: 12,561,874 of them 12,561,874 distinct
Number Of Trees(GREATER THE BETTER): 2784377

Above Binary-Search-Tree with MaxPEAK = 40 has NODEs = 55 and LEAFs = 11
Words per second performance: 1,356,588W/s
Input File with a list of TEXTual Files: Leprechaun_vs_Wikipedia_LATIN-WORDS.lst
Size of all TEXTual Files: 415,982,896
Word count: 35,271,297 of them 22,202,980 distinct
Number Of Trees(GREATER THE BETTER): 3550528

FNV1_32_PRIME: //3550733: 172783361

Above Binary-Search-Tree with MaxPEAK = 59 has NODEs = 59 and LEAFs = 1
Words per second performance: 1,046,822W/s
Input File with a list of TEXTual Files: Leprechaun_vs_Wikipedia_en-WORDS.lst
Size of all TEXTual Files: 146,973,879
Word count: 12,561,874 of them 12,561,874 distinct
Number Of Trees(GREATER THE BETTER): 2786362

Above Binary-Search-Tree with MaxPEAK = 38 has NODEs = 70 and LEAFs = 17
Words per second performance: 1,410,851W/s
Input File with a list of TEXTual Files: Leprechaun_vs_Wikipedia_LATIN-WORDS.lst
Size of all TEXTual Files: 415,982,896
Word count: 35,271,297 of them 22,202,980 distinct
Number Of Trees(GREATER THE BETTER): 3550746

FNV1_32_PRIME: //3550929: 204312319

Above Binary-Search-Tree with MaxPEAK = 61 has NODEs = 61 and LEAFs = 1
Words per second performance: 966,298W/s
Input File with a list of TEXTual Files: Leprechaun_vs_Wikipedia_en-WORDS.lst
Size of all TEXTual Files: 146,973,879
Word count: 12,561,874 of them 12,561,874 distinct
Number Of Trees(GREATER THE BETTER): 2785581

Above Binary-Search-Tree with MaxPEAK = 37 has NODEs = 55 and LEAFs = 12
Words per second performance: 1,356,588W/s
Input File with a list of TEXTual Files: Leprechaun_vs_Wikipedia_LATIN-WORDS.lst
Size of all TEXTual Files: 415,982,896
Word count: 35,271,297 of them 22,202,980 distinct
Number Of Trees(GREATER THE BETTER): 3550886

Leprechaun_Microsoft.exe: FNV1_32_PRIME: //3551736: 107712257

Above Binary-Search-Tree with MaxPEAK = 61 has NODEs = 61 and LEAFs = 1
Words per second performance: 1,046,822W/s
Input File with a list of TEXTual Files: Leprechaun_vs_Wikipedia_en-WORDS.lst
Size of all TEXTual Files: 146,973,879
Word count: 12,561,874 of them 12,561,874 distinct
Number Of Trees(GREATER THE BETTER): 2786515

Above Binary-Search-Tree with MaxPEAK = 36 has NODEs = 64 and LEAFs = 15
Words per second performance: 1,356,588W/s
Input File with a list of TEXTual Files: Leprechaun_vs_Wikipedia_LATIN-WORDS.lst
Size of all TEXTual Files: 415,982,896
Word count: 35,271,297 of them 22,202,980 distinct
Number Of Trees(GREATER THE BETTER): 3551744

Leprechaun_Intel.exe: FNV1_32_PRIME: //3551736: 107712257

Above Binary-Search-Tree with MaxPEAK = 61 has NODEs = 61 and LEAFs = 1
Words per second performance: 1,256,187W/s
Input File with a list of TEXTual Files: Leprechaun_vs_Wikipedia_en-WORDS.lst
Size of all TEXTual Files: 146,973,879
Word count: 12,561,874 of them 12,561,874 distinct
Number Of Trees(GREATER THE BETTER): 2786515

Above Binary-Search-Tree with MaxPEAK = 36 has NODEs = 64 and LEAFs = 15
Words per second performance: 1,603,240W/s
Input File with a list of TEXTual Files: Leprechaun_vs_Wikipedia_LATIN-WORDS.lst
Size of all TEXTual Files: 415,982,896
Word count: 35,271,297 of them 22,202,980 distinct
Number Of Trees(GREATER THE BETTER): 3551744

Wow: 1,603,240W/s vs 1,356,588W/s respectively Leprechaun_Intel.exe vs Leprechaun_Microsoft.exe, i.e. 18% betterment, no joke!

Alchemical search for best PRIME-PAIR revision uses next line:
Slot = FNV1A_Hash_4_OCTETS(wrd, wrdlen>>2)<<2; //13++++
This revision uses next lines:
if (wrdlen<=19) // 4x4+3=19 i.e. last contains 7 clashes
              Slot = FNV1A_Hash_Granularity(wrd, wrdlen>>2, 2)<<2; //13+++++
else            // 2x8+4=20 i.e. first contains 6 clashes
              Slot = FNV1A_Hash_Granularity(wrd, wrdlen>>3, 3)<<2; //13+++++

! An expected but unpleasant degradation for 3551961: 428904191 compared to 3551736: 107712257, this shows 'FNV1A_Hash_4_OCTETS' has only figurative purpose - the 4 lines of 'FNV1A_Hash_Granularity' decide the last usefulness.

Leprechaun.exe: FNV1_32_PRIME: //3551961: 428904191

Above Binary-Search-Tree with MaxPEAK = 60 has NODEs = 60 and LEAFs = 1
Words per second performance: 966,298W/s
Input File with a list of TEXTual Files: Leprechaun_vs_Wikipedia_en-WORDS.lst
Size of all TEXTual Files: 146,973,879
Word count: 12,561,874 of them 12,561,874 distinct
Number Of Trees(GREATER THE BETTER): 2786383

Above Binary-Search-Tree with MaxPEAK = 39 has NODEs = 71 and LEAFs = 16
Words per second performance: 1,410,851W/s
Input File with a list of TEXTual Files: Leprechaun_vs_Wikipedia_LATIN-WORDS.lst
Size of all TEXTual Files: 415,982,896
Word count: 35,271,297 of them 22,202,980 distinct
Number Of Trees(GREATER THE BETTER): 3551503

Leprechaun.exe: FNV1_32_PRIME: //3552103: 588411137

Above Binary-Search-Tree with MaxPEAK = 6 has NODEs = 6 and LEAFs = 1
Size of all TEXTual Files: 4,067,439
Word count: 358,798 of them 351,116 distinct
Number Of Trees(GREATER THE BETTER): 310622
Total Number of LEAFs in Binary-Search-Trees(GREATER THE BETTER): 310,622

Above Binary-Search-Tree with MaxPEAK = 60 has NODEs = 60 and LEAFs = 1
Size of all TEXTual Files: 146,973,879
Word count: 12,561,874 of them 12,561,874 distinct
Number Of Trees(GREATER THE BETTER): 2786485
Total Number of LEAFs in Binary-Search-Trees(GREATER THE BETTER): 2,786,485

Above Binary-Search-Tree with MaxPEAK = 39 has NODEs = 62 and LEAFs = 15
Size of all TEXTual Files: 415,982,896
Word count: 35,271,297 of them 22,202,980 distinct
Number Of Trees(GREATER THE BETTER): 3551956
Total Number of LEAFs in Binary-Search-Trees(GREATER THE BETTER): 8,072,131

Leprechaun.exe: FNV1_32_PRIME: //3552039: 602173697 !!!GOODEST so far!!!

Above Binary-Search-Tree with MaxPEAK = 6 has NODEs = 6 and LEAFs = 1
Size of all TEXTual Files: 4,067,439
Word count: 358,798 of them 351,116 distinct
Number Of Trees(GREATER THE BETTER): 310948
Total Number of LEAFs in Binary-Search-Trees(GREATER THE BETTER): 310,948

Above Binary-Search-Tree with MaxPEAK = 63 has NODEs = 63 and LEAFs = 1
Size of all TEXTual Files: 146,973,879
Word count: 12,561,874 of them 12,561,874 distinct
Number Of Trees(GREATER THE BETTER): 2786806
Total Number of LEAFs in Binary-Search-Trees(GREATER THE BETTER): 2,786,806

Above Binary-Search-Tree with MaxPEAK = 36 has NODEs = 52 and LEAFs = 9
Input File with a list of TEXTual Files: Leprechaun_vs_Wikipedia_LATIN-WORDS.lst
Size of all TEXTual Files: 415,982,896
Word count: 35,271,297 of them 22,202,980 distinct
Number Of Trees(GREATER THE BETTER): 3552296
Total Number of LEAFs in Binary-Search-Trees(GREATER THE BETTER): 8,072,899

Between 1 and 602392027 at step 100 following FNV1_32_PRIMEs(for FNV1_32_INIT=2166136261) give(FNV1A_Hash_4_OCTETS) dispersion:
3550022: 423779327
3550028: 513793537
3550053: 434840321
3550067: 437062229
3550080: 420344321
3550090: 304777471
3550097: 496547839
3550129: 390809599
3550132: 175757909
3550163: 353712127
3550231: 334434817
3550237: 272789761
3550247: 590341121
3550255: 358814207
3550277: 437182721
3550326: 521795327
3550347: 311867393
3550447: 456137729
3550458: 418208767
3550516: 602048767
3550525: 513597697
3550526: 347283199
3550528: 598773503
3550592: 598139137
3550598: 242448127
3550611: 571481087
3550628: 457012993
3550664: 482822143
3550666: 249098753
3550687: 201887489
3550702: 489976063
3550710: 272961023
3550733: 172783361
3550734: 431562497
3550929: 204312319
3550984: 562853633
3550991: 551362303
3551359: 332820737
3551484: 354126079
3551514: 407138561
3551523: 442058753
3551701: 449230849
3551736: 107712257
3551961: 428904191
3552039: 602173697
3552103: 588411137
*/

// Windows: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//  _CRTIMP size_t __cdecl fread(void *, size_t, size_t, FILE *);
//  _CRTIMP size_t __cdecl fwrite(const void *, size_t, size_t, FILE *);
//  _CRTIMP int __cdecl fgetpos(FILE *, fpos_t *);
//  _CRTIMP int __cdecl fsetpos(FILE *, const fpos_t *);

//  _CRTIMP __int64 __cdecl _lseeki64(int, __int64, int);
//  _CRTIMP __int64 __cdecl _telli64(int);
//  _CRTIMP __int64 __cdecl _filelengthi64(int);
//  above 3 are in 'io.h'

//  _CRTIMP int __cdecl fseek(FILE *, long, int);
//  _CRTIMP long __cdecl ftell(FILE *);
//  _CRTIMP int __cdecl fclose(FILE *);

//  #ifndef _SIZE_T_DEFINED
//  #ifdef  _WIN64
//  typedef unsigned __int64    size_t;
//  #else
//  typedef _W64 unsigned int   size_t;
//  #endif
//  #define _SIZE_T_DEFINED
//  #endif

//  typedef __int64 fpos_t;

// Linux: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//  size_t fread (void *data, size_t size, size_t count, FILE *stream)
//  size_t fwrite (const void *data, size_t size, size_t count, FILE *stream)
//  int fgetpos (FILE *stream, fpos_t *position)
//  int fsetpos (FILE *stream, const fpos_t *position)

//  FILE * fopen64 (const char *filename, const char *opentype)
//  int fseeko64 (FILE *stream, off64_t offset, int whence)  
//  off64_t ftello64 (FILE *stream)  
//  int fclose (FILE *stream)  

//  off_t lseek (int filedes, off_t offset, int whence)
//  above 1 is in 'unistd.h'


// ============== MUST work both for Windows and Linux ==============
//Only one must be uncommented:
#define _WIN32_ENVIRONMENT_
//#define _POSIX_ENVIRONMENT_

//Only one must be uncommented:
//#define singleton
//#define doubleton
//#define tripleton
//#define quadrupleton
#define quintupleton
//#define sextupleton
//#define septupleton
//#define octupleton
//#define nonupleton
//#define decupleton

#ifdef singleton
#define _ngram_ 1
#endif
#ifdef doubleton
#define _ngram_ 2
#endif
#ifdef tripleton
#define _ngram_ 3
#endif
#ifdef quadrupleton
#define _ngram_ 4
#endif
#ifdef quintupleton
#define _ngram_ 5
#endif
#ifdef sextupleton
#define _ngram_ 6
#endif
#ifdef septupleton
#define _ngram_ 7
#endif
#ifdef octupleton
#define _ngram_ 8
#endif
#ifdef nonupleton
#define _ngram_ 9
#endif
#ifdef decupleton
#define _ngram_ 10
#endif

#ifndef NULL
#ifdef __cplusplus
#define NULL 0
#else
#define NULL ((void*)0)
#endif
#endif

#define HashInBITS 24 // default 26 i.e. 2^26 i.e. 64MS(Mega Slots); slots contain 8bytes pointers or 512MB, because many netbooks have 512MB free (1GB in total)!
#define HashChunkSizeInBITS 24 // Defines the number of passes. Should be smaller or equal to HashInBITS. If HashInBITS == HashChunkSizeInBITS then 2^(HashInBITS-HashChunkSizeInBITS)=2^0=1 pass(es).
/*
Tests done on super-speed-ramdisk 1800MB:
Leprechaun_quadrupleton rev. 14+ in fact differs from r.14 only with optimized(LEAFwise) fragment 1] and 2]. Fragmet 3] and dump are not still optimized. The goal is to track how this partial tweak will affect 64KS(Kilo Slots) or 512KB hash or 1000 times smaller hash variant.

[Variant (HashInBITS 26 - 0) with 512MB hash:]

Leprechaun_quadrupleton (Fast Greedy Phrase-Ripper), rev. 14+, written by Svalqyatchx.
Purpose: Rips all distinct 4-grams (4-word phrases) with length 12..51 chars from incoming texts.
Feature1: In this revision 512MB 1-way hash is used which results in 67,108,864 external B-Trees of order 3.
Feature2: The bottleneck is seek-time, if the external memory has latency 100+microseconds then look further.
Size of input file with files for Leprechauning: 19
Allocating HASH memory 536,870,977 bytes ... OK
Allocating/ZEROing 1,292,478,478 bytes swap file ... OK
Size of Input TEXTual file: 206,908,949
|; 0,065,139P/s; Phrase count: 18,760,213 of them 10,165,640 distinct; Done: 64/64
Bytes per second performance: 718,433B/s
Phrases per second performance: 65,139P/s
Time for putting phrases into trees: 288 second(s)
Flushing UNsorted phrases: 100%; Shaking trees performance: 0,014,439P/s
Time for shaking phrases from trees: 704 second(s)
	Dump LEAFwise also [
	Bytes per second performance: 736,330B/s
	Phrases per second performance: 66,762P/s
	Time for putting phrases into trees: 281 second(s)
	Flushing UNsorted phrases: 100%; Shaking trees performance: 0,023,807P/s
	Time for shaking phrases from trees: 427 second(s)
	Dump LEAFwise also ]
Leprechaun: Done.

Leprechaun report:
Number Of Hash Collisions(Distinct WORDs - Number Of Trees): 731,746
Number Of Trees(GREATER THE BETTER): 9,433,894
Number Of LEAFs(littler THE BETTER) not counting ROOT LEAFs: 69,623
Highest Tree not counting ROOT Level i.e. CORONA levels(littler THE BETTER): 1
Used value for third parameter in KB: 1,262,186
Use next time as third parameter: 1,262,186
Total Attempts to Find/Put WORDs into B-trees order 3: 365,283

[Variant (HashInBITS 26 - 10) with 512KB hash:]

Leprechaun_quadrupleton (Fast Greedy Phrase-Ripper), rev. 14+, written by Svalqyatchx.
Purpose: Rips all distinct 4-grams (4-word phrases) with length 12..51 chars from incoming texts.
Feature1: In this revision 512MB 1-way hash is used which results in 67,108,864 external B-Trees of order 3.
Feature2: The bottleneck is seek-time, if the external memory has latency 100+microseconds then look further.
Size of input file with files for Leprechauning: 19
Allocating HASH memory 524,353 bytes ... OK
Allocating/ZEROing 1,292,478,478 bytes swap file ... OK
Size of Input TEXTual file: 206,908,949
|; 0,014,331P/s; Phrase count: 18,760,213 of them 10,165,640 distinct; Done: 64/64
Bytes per second performance: 158,066B/s
Phrases per second performance: 14,331P/s
Time for putting phrases into trees: 1309 second(s)
Flushing UNsorted phrases: 100%; Shaking trees performance: 0,019,739P/s
Time for shaking phrases from trees: 515 second(s)
	Dump LEAFwise also [
	Bytes per second performance: 158,429B/s
	Phrases per second performance: 14,364P/s
	Time for putting phrases into trees: 1306 second(s)
	Flushing UNsorted phrases: 100%; Shaking trees performance: 0,041,492P/s
	Time for shaking phrases from trees: 245 second(s)
	Dump LEAFwise also ]
	Dump & Insert LEAFwise also [
	Bytes per second performance: 174,459B/s
	Phrases per second performance: 15,818P/s
	Time for putting phrases into trees: 1186 second(s)
	Flushing UNsorted phrases: 100%; Shaking trees performance: 0,041,323P/s
	Time for shaking phrases from trees: 246 second(s)
	Dump & Insert LEAFwise also ]
Leprechaun: Done.

Leprechaun report:
Number Of Hash Collisions(Distinct WORDs - Number Of Trees): 10,100,104
Number Of Trees(GREATER THE BETTER): 65,536
Number Of LEAFs(littler THE BETTER) not counting ROOT LEAFs: 7,522,788
Highest Tree not counting ROOT Level i.e. CORONA levels(littler THE BETTER): 6
Used value for third parameter in KB: 1,262,186
Use next time as third parameter: 1,007,825
Total Attempts to Find/Put WORDs into B-trees order 3: 84,868,241

[Variant (HashInBITS 26 - 20) with 512 hash:]

Leprechaun_quadrupleton (Fast Greedy Phrase-Ripper), rev. 14+, written by Svalqyatchx.
Purpose: Rips all distinct 4-grams (4-word phrases) with length 12..51 chars from incoming texts.
Feature1: In this revision 512MB 1-way hash is used which results in 67,108,864 external B-Trees of order 3.
Feature2: The bottleneck is seek-time, if the external memory has latency 100+microseconds then look further.
Size of input file with files for Leprechauning: 19
Allocating HASH memory 577 bytes ... OK
Allocating/ZEROing 1,292,478,478 bytes swap file ... OK
Size of Input TEXTual file: 206,908,949
|; 0,007,717P/s; Phrase count: 18,760,213 of them 10,165,640 distinct; Done: 64/64
Bytes per second performance: 85,112B/s
Phrases per second performance: 7,717P/s
Time for putting phrases into trees: 2431 second(s)
Flushing UNsorted phrases: 100%; Shaking trees performance: 0,019,777P/s
Time for shaking phrases from trees: 514 second(s)
Leprechaun: Done.

Leprechaun report:
Number Of Hash Collisions(Distinct WORDs - Number Of Trees): 10,165,576
Number Of Trees(GREATER THE BETTER): 64
Number Of LEAFs(littler THE BETTER) not counting ROOT LEAFs: 7,592,585
Highest Tree not counting ROOT Level i.e. CORONA levels(littler THE BETTER): 14
Used value for third parameter in KB: 1,262,186
Use next time as third parameter: 1,008,399
Total Attempts to Find/Put WORDs into B-trees order 3: 271,393,689

r.14++ physical memory test [Variant (HashInBITS 26 - 0) with 512MB hash:]:
D:\_KAZE_new-stuff\Leprechaun_quadrupleton_r14++_64bit_Physical-n-Virtual>OSHO-TEST_INTERNAL.BAT
Leprechaun_quadrupleton (Fast-In-Future Greedy Phrase-Ripper), rev. 14++, written by Svalqyatchx.
Purpose: Rips all distinct 4-grams (4-word phrases) with length 12..51 chars from incoming texts.
Feature1: In this revision 512MB 1-way hash is used which results in 67,108,864 external B-Trees of order 3.
Feature2: If the external memory has latency 99+microseconds then !(look no further), IOPS(seek-time) rules.
Size of input file with files for Leprechauning: 19
Allocating HASH memory 536,870,977 bytes ... OK
Allocating memory 1233MB ... OK
Size of Input TEXTual file: 206,908,949
|; 1,042,234P/s; Phrase count: 18,760,213 of them 10,165,640 distinct; Done: 64/64
Bytes per second performance: 11,494,941B/s
Phrases per second performance: 1,042,234P/s
Time for putting phrases into trees: 18 second(s)
Flushing UNsorted phrases: 100%; Shaking trees performance: 0,597,978P/s
Time for shaking phrases from trees: 17 second(s)
Leprechaun: Done.

D:\_KAZE_new-stuff\Leprechaun_quadrupleton_r14++_64bit_Physical-n-Virtual>type Leprechaun.LOG
Leprechaun report:
Number Of Hash Collisions(Distinct WORDs - Number Of Trees): 731,746
Number Of Trees(GREATER THE BETTER): 9,433,894
Number Of LEAFs(littler THE BETTER) not counting ROOT LEAFs: 69,623
Highest Tree not counting ROOT Level i.e. CORONA levels(littler THE BETTER): 1
Used value for third parameter in KB: 1,262,186
Use next time as third parameter: 1,262,186
Total Attempts to Find/Put WORDs into B-trees order 3: 365,283

D:\_KAZE_new-stuff\Leprechaun_quadrupleton_r14++_64bit_Physical-n-Virtual>
*/

// To do #1: Put this 31 in MAXwl: 'int MAXwl = 31;'
// To do #2: No need of flushing unsorted words to file: make backup[] array
//           instead of writing. And mostly sort 26 times!     
// HEAVY BUG in r.7: unsigned long Hill(unsigned long n)
//                   is NOT identical with
//                   unsigned long GRMBLhill[32]; // 00 not used, only 01..31
//                   BECAUSE DUMBEST DUMB Array GRMBLhill expects 'int' not
//                   'unsigned long' !!!

#include <stdio.h>
#include <ctype.h>
#include <time.h>
#if defined(_WIN32_ENVIRONMENT_)
#include <io.h> // needed for Windows' 'lseeki64' and 'telli64'
//Above line must be commented in order to compile with Intel C compiler: an error "can't find io.h" occurs.
#else
#endif /* defined(_WIN32_ENVIRONMENT_)  */

typedef unsigned char char_t;
typedef char_t *string;

	clock_t clocks1, clocks2;
	int Bozan;

typedef unsigned char u_int8_t; //FNV only
typedef unsigned long u_int32_t; //FNV only
typedef unsigned long long u_int64_t; //FNV only

// SINHA fragment[

#define swapKAZE(a, b) { t = *(a); *(a) = *(b); *(b) = t; }

static void InsertSortKAZE(string *a, int n, int d) //void inssort(unsigned char **a, int n, int d)
{   string *pi, *pj, s, t;                          //unsigned char **pi, **pj, *s, *t;
    for (pi = a + 1; --n > 0; pi++)
        for (pj = pi; pj > a; pj--) {
            /* Inline strcmp: break if *(pj-1) <= *pj */
            for (s=*(pj-1)+d, t=*pj+d; *s==*t && *s!=0; s++, t++)
                ;
            if (*s <= *t)
                break;
            swapKAZE(pj, pj-1);
    }
}

//int cmpit(unsigned char **h1, unsigned char **h2)
//{
//    return( scmp(*h1, *h2) );
//}

//int scmp( unsigned char *s1, unsigned char *s2 )
//{
//    while( *s1 != '\0' && *s1 == *s2 )
//    {
//        s1++;
//        s2++;
//    }
//    return( *s1-*s2 );
//}

//static void simplesort(string a[], int n, int b)
//{
//   int i, j;
//   string tmp;
//
//   for (i = 1; i < n; i++)
//      for (j = i; j > 0 && scmp(a[j-1]+b, a[j]+b) > 0; j--)
//         { tmp = a[j]; a[j] = a[j-1]; a[j-1] = tmp; }
//}

// SINHA fragment]

// mkqsort.c BEGIN **********************************************************
/*
   Multikey quicksort, a radix sort algorithm for arrays of character
   strings by Bentley and Sedgewick.

   J. Bentley and R. Sedgewick. Fast algorithms for sorting and
   searching strings. In Proceedings of 8th Annual ACM-SIAM Symposium
   on Discrete Algorithms, 1997.

   http://www.CS.Princeton.EDU/~rs/strings/index.html

   The code presented in this file has been tested with care but is
   not guaranteed for any purpose. The writer does not offer any
   warranties nor does he accept any liabilities with respect to
   the code.

   Ranjan Sinha, 1 jan 2003.

   School of Computer Science and Information Technology,
   RMIT University, Melbourne, Australia
   rsinha@cs.rmit.edu.au

*/

//#include "sortstring.h"

/* MULTIKEY QUICKSORT */

#ifndef min
#define min(a, b) ((a)<=(b) ? (a) : (b))
#endif


// ------------------------------- BTREE [
#define false -1
#define true 0

struct nodeBTREE {
int data;
struct nodeBTREE* left;
struct nodeBTREE* right;
};

// ------------------------------- BTREE ]


/* ssort2 -- Faster Version of Multikey Quicksort */

void vecswap2(unsigned char **a, unsigned char **b, int n)
{   while (n-- > 0) {
        unsigned char *t = *a;
        *a++ = *b;
        *b++ = t;
    }
}

#define swap2(a, b) { t = *(a); *(a) = *(b); *(b) = t; }
#define ptr2char(i) (*(*(i) + depth))

unsigned char **med3func(unsigned char **a, unsigned char **b, unsigned char **c, int depth)
{   int va, vb, vc;
    if ((va=ptr2char(a)) == (vb=ptr2char(b)))
        return a;
    if ((vc=ptr2char(c)) == va || vc == vb)
        return c;       
    return va < vb ?
          (vb < vc ? b : (va < vc ? c : a ) )
        : (vb > vc ? b : (va < vc ? a : c ) );
}
#define med3(a, b, c) med3func(a, b, c, depth)

void inssort(unsigned char **a, int n, int d)
{   unsigned char **pi, **pj, *s, *t;
    for (pi = a + 1; --n > 0; pi++)
        for (pj = pi; pj > a; pj--) {
            /* Inline strcmp: break if *(pj-1) <= *pj */
            for (s=*(pj-1)+d, t=*pj+d; *s==*t && *s!=0; s++, t++)
                ;
            if (*s <= *t)
                break;
            swap2(pj, pj-1);
    }
}

void mkqsort(unsigned char **a, int n, int depth)
{   int d, r, partval;
    unsigned char **pa, **pb, **pc, **pd, **pl, **pm, **pn, *t;
    if (n < 20) {
        inssort(a, n, depth);
        return;
    }
    pl = a;
    pm = a + (n/2);
    pn = a + (n-1);
    if (n > 30) { /* On big arrays, pseudomedian of 9 */
        d = (n/8);
        pl = med3(pl, pl+d, pl+2*d);
        pm = med3(pm-d, pm, pm+d);
        pn = med3(pn-2*d, pn-d, pn);
    }
    pm = med3(pl, pm, pn);
    swap2(a, pm);
    partval = ptr2char(a);
    pa = pb = a + 1;
    pc = pd = a + n-1;
    for (;;) {
        while (pb <= pc && (r = ptr2char(pb)-partval) <= 0) {
            if (r == 0) { swap2(pa, pb); pa++; }
            pb++;
        }
        while (pb <= pc && (r = ptr2char(pc)-partval) >= 0) {
            if (r == 0) { swap2(pc, pd); pd--; }
            pc--;
       }
        if (pb > pc) break;
        swap2(pb, pc);
        pb++;
        pc--;
    }
    pn = a + n;
    r = min(pa-a, pb-pa);    vecswap2(a,  pb-r, r);
    r = min(pd-pc, pn-pd-1); vecswap2(pb, pn-r, r);
    if ((r = pb-pa) > 1)
        mkqsort(a, r, depth);
    if (ptr2char(a + r) != 0)
        mkqsort(a + r, pa-a + pn-pd-1, depth+1);
    if ((r = pd-pc) > 1)
        mkqsort(a + n-r, r, depth);
}

void mkqsort_main(unsigned char **a, int n) { mkqsort(a, n, 0); }
// mkqsort.c END ************************************************************

// Why Sinha uses int instead of long??!!
static int readlines(char *file_name, string **lines)
{
   int nlines = 0;
   size_t size;
   FILE *in_file;
   string basep, cur, next;
   string *ASbackup;

   if (!(in_file = fopen(file_name, "rb"))) {
      printf( "Leprechaun: Can't open file %s \n", file_name ); 
      exit(-1);
   }
   fseek(in_file, 0, SEEK_END);
   size = ftell(in_file);
   fseek(in_file, 0, SEEK_SET);
   if (!(basep = (string) malloc(size*sizeof(char_t)))) return -1;
   printf( "Allocated memory for words in MB: %lu\n", ((size*sizeof(char_t))>>20)+1 );
   if (fread(basep, 1, size, in_file) < size) {
      printf( "Leprechaun: Can't read file %s \n", file_name ); 
      exit(-1);
   }
   fclose(in_file);

// GET nlines:
   cur = basep;
   while (cur < basep + size) {
      next = cur;
      while ((next < basep + size) && (*next != '\n')) {next++;}
      *--next = '\0';          // This is ala DOS i.e. Windows
                               // 1310 not 10(\n=10)
      cur = next + 2;
      nlines++;
   }

// printf("%lu\n",(unsigned long)*lines); -> backup = *lines = 0
ASbackup = (string *)malloc( nlines*sizeof(string) ); // sizeof(string) is 4
if( ASbackup == NULL )
{ puts( "Leprechaun: Needed memory allocation denied!\n" ); return( 1 ); }
printf( "Allocated memory for pointers-to-words in MB: %lu\n", ((nlines*sizeof(string))>>20)+1 );
*lines = ASbackup;
//printf("%lu\n",(unsigned long)*lines); -> backup = *lines = ASbackup = 6946888

// Upload nlines times:
   nlines = 0;
   cur = basep;
   while (cur < basep + size) {
      next = cur;
      while ((next < basep + size) && (*next != '\n')) {next++;}
      *--next = '\0';          // This is ala DOS i.e. Windows
                               // 1310 not 10(\n=10)
      ASbackup[nlines] = cur;
      cur = next + 2;
      nlines++;
   }
   return nlines;
}

void x64toaKAZE (      /* stdcall is faster and smaller... Might as well use it for the helper. */
        unsigned long long val,
        char *buf,
        unsigned radix,
        int is_neg
        )
{
        char *p;                /* pointer to traverse string */
        char *firstdig;         /* pointer to first digit */
        char temp;              /* temp char */
        unsigned digval;        /* value of digit */

        p = buf;

        if ( is_neg )
        {
            *p++ = '-';         /* negative, so output '-' and negate */
            val = (unsigned long long)(-(long long)val);
        }

        firstdig = p;           /* save pointer to first digit */

        do {
            digval = (unsigned) (val % radix);
            val /= radix;       /* get next digit */

            /* convert to ascii and store */
            if (digval > 9)
                *p++ = (char) (digval - 10 + 'a');  /* a letter */
            else
                *p++ = (char) (digval + '0');       /* a digit */
        } while (val > 0);

        /* We now have the digit of the number in the buffer, but in reverse
           order.  Thus we reverse them now. */

        *p-- = '\0';            /* terminate string; p points to last digit */

        do {
            temp = *p;
            *p = *firstdig;
            *firstdig = temp;   /* swap *p and *firstdig */
            --p;
            ++firstdig;         /* advance to next two digits */
        } while (firstdig < p); /* repeat until halfway */
}

/* Actual functions just call conversion helper with neg flag set correctly,
   and return pointer to buffer. */

char * _i64toaKAZE (
        long long val,
        char *buf,
        int radix
        )
{
        x64toaKAZE((unsigned long long)val, buf, radix, (radix == 10 && val < 0));
        return buf;
}

char * _ui64toaKAZE (
        unsigned long long val,
        char *buf,
        int radix
        )
{
        x64toaKAZE(val, buf, radix, 0);
        return buf;
}

char * _ui64toaKAZEzerocomma (
        unsigned long long val,
        char *buf,
        int radix
        )
{
                        char *p;
                        char temp;
                        int txpman;
                        int pxnman;
        x64toaKAZE(val, buf, radix, 0);
                        p = buf;
                        do {
                        } while (*++p != '\0');
                        p--; // p points to last digit
                             // buf points to first digit
                        buf[26] = 0;
                        txpman = 1;
                        pxnman = 0;
                        do
                        { if (buf <= p)
                          { temp = *p;
                            buf[26-txpman] = temp; pxnman++;
                            p--;
                            if (pxnman % 3 == 0)
                            { txpman++;
                              buf[26-txpman] = (char) (',');
                            }
                          }
                          else
                          { buf[26-txpman] = (char) ('0'); pxnman++;
                            if (pxnman % 3 == 0)
                            { txpman++;
                              buf[26-txpman] = (char) (',');
                            }
                          }
                          txpman++;
                        } while (txpman <= 26);
        return buf;
}

char * _ui64toaKAZEcomma (
        unsigned long long val,
        char *buf,
        int radix
        )
{
                        char *p;
                        char temp;
                        int txpman;
                        int pxnman;
        x64toaKAZE(val, buf, radix, 0);
                        p = buf;
                        do {
                        } while (*++p != '\0');
                        p--; // p points to last digit
                             // buf points to first digit
                        buf[26] = 0;
                        txpman = 1;
                        pxnman = 0;
                        while (buf <= p)
                        { temp = *p;
                          buf[26-txpman] = temp; pxnman++;
                          p--;
                          if (pxnman % 3 == 0 && buf <= p)
                          { txpman++;
                            buf[26-txpman] = (char) (',');
                          }
                          txpman++;
                        } 
        return buf+26-(txpman-1);
}

unsigned char KuxHash(char *str)
{ unsigned char h = 0;
  int max31 = 0;
  //while (*str)
  while (str[max31])
  { h = h ^ str[max31++];
    //h = h ^ *str++; // I am not sure 'str' is returned changed after return?!
  }
  return h; // 00..255 i.e. 2^8=256
}

int KuxHash2(char *str) 
{ int h = 0;
  unsigned long h2 = 0; // must be long: 31*'z'=31*122
  int max31 = 0;
  while (str[max31])
  { h = h ^ str[max31]; // 00..255 i.e. 2^8=256
    //h2 = h2 + str[max31++];       // [113s]
    h2 = h2 + max31 * str[max31++];
  }
  h=h<<4; // 00..15 i.e. 2^4=16
  //h = h|( str[0] ^ str[max31-1] ); // [111s] a..z: each XOR each gives 00..31
  h = h|( h2%((1<<4)-1) );
  return h; // 00..4095 i.e. 2^12=4096
}

//OSHO test - Attempts to Find/Put a WORD into linked list count: 32,011,937
int KuxHash3(char *str) 
{ int h = 0;
  unsigned long h2 = 0; // must be long: 31*'z'=31*122
  int max31 = 0;
  while (str[max31])
  { h = h ^ str[max31]; // 00..255 i.e. 2^8=256
    //h2 = h2 + str[max31++];       // [113s]
    h2 = h2 + str[max31++] * (max31+1);
  }
// Result is: 7bits in 'h' and 32bits in 'h2'.

                        //printf("%s:\n ",str);
                        //printf("%d ",h);
  h=h<<6; // 00..15 i.e. 00-05+7bits=13bits
                        //printf("%d ",h);
                        //printf("%d ",h2);
  //h = h|( str[0] ^ str[max31-1] ); // [111s] a..z: each XOR each gives 00..31
  h = h|( h2%((1<<6)-1) ); // 64-1=63=9*7; 61 is prime
                        //printf("%d \n",h);
  return h; // 00..8191 i.e. 2^13=8192
}

//OSHO test - Attempts to Find/Put a WORD into a linked list count: 31,927,285
int KuxHash3plus(char *str)
{ int h = 0;
  unsigned long h2 = 0; // must be long: 31*'z'=31*122
  int max31 = 0;
  while (str[max31])
  { h = h ^ str[max31]; // 00..255 i.e. 2^8=256
    //h2 = h2 + str[max31++];       // [113s]
    h2 = h2 + str[max31++] * (max31+1);
  }
// Result is: 7bits in 'h' and 32bits in 'h2'.

                        //printf("%s:\n ",str);
                        //printf("%d ",h);
// a in ASCII is 097 = 0110 0001
// z in ASCII is 122 = 0111 1010
// Above two lines show that bits 8-7-6 are always 0-1-1 so need for low 5 bits.
  //h=h<<8; // 00..15 i.e. 5bits + 00-07bits=13bits
                        //printf("%d ",h);
                        //printf("%d ",h2);
  //h = h|( str[0] ^ str[max31-1] ); // [111s] a..z: each XOR each gives 00..31
  h = (( h<<8 )|( h2%(251) ))&8191; // 251 prime
                        //printf("%d \n",h);
  return h; // 00..8191 i.e. 2^13=8192
}

/*
PUBLIC	_KuxHash3plus
; Function compile flags: /Ogty
_TEXT	SEGMENT
_str$ = 8						; size = 4
_KuxHash3plus PROC NEAR
; Line 511
	mov	ecx, DWORD PTR _str$[esp-4]
	mov	dl, BYTE PTR [ecx]
	push	esi
	xor	esi, esi
	xor	eax, eax
	test	dl, dl
	je	SHORT $L1561
	push	ebx
	push	edi
	mov	edi, 1
	sub	edi, ecx
	npad	8
$L1560:
; Line 512
	movsx	edx, BYTE PTR [ecx]
; Line 514
	lea	ebx, DWORD PTR [edi+ecx]
	imul	ebx, edx
	xor	esi, edx
	mov	dl, BYTE PTR [ecx+1]
	add	eax, ebx
	inc	ecx
	test	dl, dl
	jne	SHORT $L1560
	pop	edi
	pop	ebx
$L1561:
; Line 527
	xor	edx, edx
	mov	ecx, 251				; 000000fbH
	div	ecx
	shl	esi, 8
	mov	eax, edx
; Line 529
	or	eax, esi
	and	eax, 8191				; 00001fffH
	pop	esi
; Line 530
	ret	0
_KuxHash3plus ENDP
_TEXT	ENDS
*/

//OSHO test - Attempts to Find/Put a WORD into a linked list count: 32,021,975
int KuxHash4(char *str)
{ 
  int h2 = 0; 
    for (; *str != 0; str++) {
      //h2 = (127*h2 + *str) % (8192-1); // 2^13-1 = 8191 is Mersenne prime
      h2 = ((h2<<7) + *str) % (8192-1); // 2^13-1 = 8191 is Mersenne prime
    }

  return h2; // 00..8191 i.e. 2^13=8192
}

/*
int hash(char *v, int M)
  { int h = 0, a = 127;
    for (; *v != 0; v++) 
      h = (a*h + *v) % M;
    return h;
  }

int hashU(char *v, int M)
  { int h, a = 31415, b = 27183;
    for (h = 0; *v != 0; v++, a = a*b % (M-1)) 
        h = (a*h + *v) % M;
    return (h < 0) ? (h + M) : h;
  }
*/

// Kaze: My appreciation of FNV is far beyound C code optimization, it is alchemical, and why not, magical.

/*
FNV hash history
    The basis of the FNV hash algorithm was taken from an idea sent as reviewer comments to the IEEE POSIX P1003.2 committee 
by Glenn Fowler and Phong Vo back in 1991. In a subsequent ballot round: Landon Curt Noll improved on their algorithm. 
Some people tried this hash and found that it worked rather well. In an EMail message to Landon, they named it 
the ``Fowler/Noll/Vo'' or FNV hash.
    FNV hashes are designed to be fast while maintaining a low collision rate. The FNV speed allows one to quickly hash 
lots of data while maintaining a reasonable collision rate. The high dispersion of the FNV hashes makes them well suited 
for hashing nearly identical strings such as URLs, hostnames, filenames, text, IP addresses, etc. 
*/

/* NOTE: u_int64_t is a 64 bit unsigned type */
/* NOTE: u_int32_t is a 32 bit unsigned type */
/* NOTE: u_int16_t is a 16 bit unsigned type */
/* NOTE: u_int8_t is a 8 bit unsigned type */

//typedef unsigned char u_int8_t; //FNV only
//typedef unsigned long u_int32_t; //FNV only
//typedef unsigned long long u_int64_t; //FNV only

// 32 bit FNV_prime = 2^24 + 2^8 + 0x93 = 16777619
// 64 bit FNV_prime = 2^40 + 2^8 + 0xb3 = 1099511628211

// 32 bit offset_basis = 2166136261
// 64 bit offset_basis = 14695981039346656037

#define FNV1_64_INIT ((u_int64_t)14695981039346656037)
#define FNV1_64_PRIME ((u_int64_t)1099511628211)
#define FNV1_32_INIT ((u_int32_t)2166136261)
#define FNV1_32_PRIME ((u_int32_t)602173697)
// FNV1A_Hash_4_OCTETS gives dispersion as follows:
//3549448: 1607
//3549669: 171072511
//3550710: 272961023
//3550733: 172783361
//3550734: 431562497
//3550929: 204312319
//3550984: 562853633
//3550991: 551362303
//3551359: 332820737
//3551484: 354126079
//3551514: 407138561
//3551523: 442058753
//3551701: 449230849
//3551736: 107712257
//3551961: 428904191
//3552039: 602173697
//3552103: 588411137

#define FNV_64A_OP(hash, octet) \
    (((u_int64_t)(hash) ^ (u_int8_t)(octet)) * FNV1_64_PRIME)

#define FNV_64A_OP64(hash, octet) \
    (((u_int64_t)(hash) ^ (u_int64_t)(octet)) * FNV1_64_PRIME)

#define FNV_32A_OP_GENERIC(hash, octet) \
    (((u_int32_t)(hash) ^ (u_int8_t)(octet)) * 16777619)

#define FNV_32A_OP(hash, octet) \
    (((u_int32_t)(hash) ^ (u_int8_t)(octet)) * FNV1_32_PRIME)

#define FNV_32A_OP_MULless_core(hash, octet) \
    ( (u_int32_t)(hash) ^ (u_int8_t)(octet) )

#define FNV_32A_OP_MULless(hash, octet) \
    ( (FNV_32A_OP_MULless_core(hash, octet)<<5) - FNV_32A_OP_MULless_core(hash, octet) )

#define FNV_32A_OP32(hash, octet) \
    (((u_int32_t)(hash) ^ (u_int32_t)(octet)) * FNV1_32_PRIME)

#define FNV_32A_OP64(hash, octet) \
    (((u_int64_t)(hash) ^ (u_int64_t)(octet)) * FNV1_32_PRIME)

#define FNV_32A_OP32_MULless_core(hash, octet) \
    ( (u_int32_t)(hash) ^ (u_int32_t)(octet) )

#define FNV_32A_OP32_MULless(hash, octet) \
    ( (FNV_32A_OP32_MULless_core(hash, octet)<<5) - FNV_32A_OP32_MULless_core(hash, octet) )


// Invoking: FNV1A_Hash_4_OCTETS_31(wrd, wrdlen>>2) // = 0,1,2,3,4,5,6,7 [1..31]
int FNV1A_Hash_4_OCTETS_31(char *str, int wrdlen_QUADRUPLETS)
{ 
u_int32_t hash;
char *p;

hash = FNV1_32_INIT;
p=str;

// The goal of stage #1: to reduce number of 'imul's in fact to reduce loops.

// Stage #1:
for (; wrdlen_QUADRUPLETS != 0; --wrdlen_QUADRUPLETS) {
    hash = FNV_32A_OP32_MULless(hash, (unsigned long)*(long *)p); // mov edi, DWORD PTR [eax]
    p=p+4; // add eax, 4
}

// Stage #2:
for (; *p; ++p) {
    hash = FNV_32A_OP_MULless(hash, *p); // mov dl, BYTE PTR [ecx]
}

  //return ((hash>>13) ^ hash) & 8191; // (((u_int32_t)1<<(x))-1) where x=13
  return ((hash>>16) ^ hash) & 8191; // 00..8191 i.e. 2^13=8192
}


// Invoking: FNV1A_Hash_4_OCTETS(wrd, wrdlen>>2) // = 0,1,2,3,4,5,6,7 [1..31]
int FNV1A_Hash_4_OCTETS(char *str, int wrdlen_QUADRUPLETS)
{ 
u_int32_t hash;
char *p;

hash = FNV1_32_INIT;
p=str;

// The goal of stage #1: to reduce number of 'imul's.

// Stage #1:
for (; wrdlen_QUADRUPLETS != 0; --wrdlen_QUADRUPLETS) {
    hash = FNV_32A_OP32(hash, (unsigned long)*(long *)p); // mov edi, DWORD PTR [eax]
    p=p+4; // add eax, 4
}

// Stage #2:
for (; *p; ++p) {
    hash = FNV_32A_OP(hash, *p); // mov dl, BYTE PTR [ecx]
}

  //return ((hash>>13) ^ hash) & 8191; // (((u_int32_t)1<<(x))-1) where x=13
  return ((hash>>16) ^ hash) & 8191; // 00..8191 i.e. 2^13=8192
}

/*
Results for 'FNV1A_Hash_8_OCTETS':
Bytes per second performance: 23,110,160B/s
Words per second performance: 1,959,516W/s
Input File with a list of TEXTual Files: Leprechaun_vs_Wikipedia_LATIN-WORDS.lst
Size of all TEXTual Files: 415,982,896
Word count: 35,271,297 of them 22,202,980 distinct
Number Of Files: 8
Number Of Lines: 35271297
Allocated memory in MB: 1950
Number Of Trees(GREATER THE BETTER): 3419429
Forest population(Hash Function Quality regarding Collisions i.e. Hash Table Utilization): 51%
Number Of Hash Collisions(Distinct WORDs - Number Of Trees): 18783551
Maximum Attempts to Find/Put a WORD into a Binary-Search-Tree: '1,119'
Total Attempts to Find/Put WORDs into Binary-Search-Trees: 268,085,505
Total Number of LEAFs in Binary-Search-Trees(GREATER THE BETTER): 7,690,615
Perfectly-Balanced-Binary-Search-Tree for MaxNODEs = 2,622 must have PEAK = 12 = rounding down of integer (1+lb(2,622))
Binary-Search-Tree(1st out of 1) with MaxNODEs = 2,622 has PEAK = 592 and LEAFs = 689
Binary-Search-Tree(1st out of 1) with MaxPEAK = '1,119' has NODEs = 1,537 and LEAFs = 287
Binary-Search-Tree(1st out of 1) with MaxLEAFs = 731 has NODEs = 2,517 and PEAK = 448
*/
// Invoking: FNV1A_Hash_8_OCTETS(wrd, wrdlen>>3) // = 0,1,2,3 [1..31]
int FNV1A_Hash_8_OCTETS(char *str, int wrdlen_OCTETS)
{ 
u_int32_t hash;
char *p;

hash = FNV1_32_INIT;
p=str;

// The goal of stage #1: to reduce number of 'imul's.

// Stage #1:
for (; wrdlen_OCTETS != 0; --wrdlen_OCTETS) {
    hash = FNV_32A_OP64(hash, (unsigned long long)*(long *)p); // mov edi, DWORD PTR [eax]
    p=p+8; // add eax, 4
}

// Stage #2:
for (; *p; ++p) {
    hash = FNV_32A_OP(hash, *p); // mov dl, BYTE PTR [ecx]
}

  //return ((hash>>13) ^ hash) & 8191; // (((u_int32_t)1<<(x))-1) where x=13
  return ((hash>>16) ^ hash) & 8191; // 00..8191 i.e. 2^13=8192
}


// Invoking: FNV1A_Hash_Granularity(wrd, wrdlen>>0|2|3, 0|2|3) 
int FNV1A_Hash_Granularity(char *str, int wrdlen_granulated, int Granularity) // wrdlen>>0=wrdlen
{ 
u_int32_t hash;
u_int64_t hash64;
char *p;

hash = FNV1_32_INIT;
p=str;

// The goal of stage #1: to reduce number of 'imul's and mainly: the number of loops.

// Stage #1:
        if (Granularity == 2) {
for (; wrdlen_granulated != 0; --wrdlen_granulated) {
    hash = FNV_32A_OP32(hash, (u_int32_t)*(u_int32_t *)p);
    p=p+4; // (1<<Granularity): 1<<0=1, 1<<2=4, 1<<3=8
}
        }
        if (Granularity == 3) {
hash64 = FNV1_64_INIT;
for (; wrdlen_granulated != 0; --wrdlen_granulated) {
    hash64 = FNV_64A_OP64(hash64, (u_int64_t)*(u_int64_t *)p);
    p=p+8; // (1<<Granularity): 1<<0=1, 1<<2=4, 1<<3=8
}
for (; *p; ++p) {
    hash64 = FNV_64A_OP(hash64, (u_int8_t)*(u_int8_t *)p);
}

  //return ((hash>>13) ^ hash) & 8191; // (((u_int32_t)1<<(x))-1) where x=13
  return ((hash64>>51) ^ hash64) & 8191; // 00..8191 i.e. 2^13=8192
  // probably better shifting is not by 16 bits but ...
  //hash64>>16: 3,544,160 just bad
  //hash64>>33: 3,547,854
  //hash64>>34: 3,547,266
  //hash64>>35: 3,547,453
  //hash64>>36: 3,547,242
  //hash64>>40: 3,548,263
  //hash64>>44: 3,548,242
  //hash64>>45: 3,549,056
  //hash64>>46: 3,549,207
  //hash64>>47: 3,549,094
  //hash64>>50: 3,549,392
  //hash64>>51: 3,549,395 i.e. maximum shift: the 13 most significant bits i.e. (64-13); closest to 3,549,448

// Above results are obtained for following set: 
//if (wrdlen<=19) // 4x4+3=19 i.e. last contains 7 clashes
//              Slot = FNV1A_Hash_Granularity(wrd, wrdlen>>2, 2)<<2; //13+++++
//else            // 2x8+4=20 i.e. first contains 6 clashes
//              Slot = FNV1A_Hash_Granularity(wrd, wrdlen>>3, 3)<<2; //13+++++
        }

        //if (Granularity != 3) {
// Stage #2:
for (; *p; ++p) {
    hash = FNV_32A_OP(hash, (u_int8_t)*(u_int8_t *)p);
}

  //return ((hash>>13) ^ hash) & 8191; // (((u_int32_t)1<<(x))-1) where x=13
  return ((hash>>16) ^ hash) & 8191; // 00..8191 i.e. 2^13=8192
        //}
}


// char *string;		/* the string to 64 bit FNV-1a hash */
// u_int64_t hash;		/* will hold the final value of the hash */
// char *p;
// 
// hash = FNV1_64_INIT;
// for (p=string; *p; ++p) {
//     hash = FNV_64A_OP(hash, *p);
// }


// If you need an x-bit hash where x is not a power of 2, 
// then we recommend that you compute the FNV hash that is just larger than x-bits and xor-fold the result down to x-bits.
// By xor-folding we mean shift the excess high order bits down and xor them with the lower x-bits. 
// For tiny x < 16 bit values, we recommend using a 32 bit FNV-1 hash as follows:

//    /* NOTE: for 0 < x < 16 ONLY!!! */
//    #define TINY_MASK(x) (((u_int32_t)1<<(x))-1)
//    #define FNV1_32_INIT ((u_int32_t)2166136261)
//    u_int32_t hash;
//    void *data;
//    size_t data_len;
//
//    hash = fnv_32_buf(data, data_len, FNV1_32_INIT);
//    hash = (((hash>>x) ^ hash) & TINY_MASK(x));


int FNV1A_Hash_SHIFTless_XORless(char *str)
{ 
u_int32_t hash;		/* will hold the final value of the hash */
char *p;

hash = FNV1_32_INIT;
for (p=str; *p; ++p) {
    hash = FNV_32A_OP(hash, *p);
}
//hash = ((hash>>13) ^ hash) & 8191; // (((u_int32_t)1<<(x))-1) where x=13

  return hash & 8191; // 00..8191 i.e. 2^13=8192
}

/*
_FNV1A_Hash_SHIFTless_XORless PROC NEAR
; Line 721
	mov	edx, DWORD PTR _str$[esp-4]
	mov	cl, BYTE PTR [edx]
	test	cl, cl
	mov	eax, -2128831035			; 811c9dc5H
	je	SHORT $L1582
	npad	1
$L1580:
; Line 722
	movzx	ecx, cl
	xor	ecx, eax
	imul	ecx, 16777619				; 01000193H
	inc	edx
	mov	eax, ecx
	mov	cl, BYTE PTR [edx]
	test	cl, cl
	jne	SHORT $L1580
$L1582:
; Line 726
	and	eax, 8191				; 00001fffH
; Line 727
	ret	0
_FNV1A_Hash_SHIFTless_XORless ENDP
*/


int FNV1A_Hash(char *str)
{ 
u_int32_t hash;		/* will hold the final value of the hash */
char *p;

hash = FNV1_32_INIT;
for (p=str; *p; ++p) {
    hash = FNV_32A_OP(hash, *p);
}
//hash = ((hash>>13) ^ hash) & 8191; // (((u_int32_t)1<<(x))-1) where x=13

  return ((hash>>13) ^ hash) & 8191; // 00..8191 i.e. 2^13=8192
}

/*
_FNV1A_Hash PROC NEAR
; Line 722
	mov	edx, DWORD PTR _str$[esp-4]
	mov	al, BYTE PTR [edx]
	test	al, al
	mov	ecx, -2128831035			; 811c9dc5H
	je	SHORT $L1582
	npad	1
$L1580:
; Line 723
	movzx	eax, al
	xor	eax, ecx
	imul	eax, 16777619				; 01000193H
	inc	edx
	mov	ecx, eax
	mov	al, BYTE PTR [edx]
	test	al, al
	jne	SHORT $L1580
$L1582:
; Line 727
	mov	eax, ecx
	shr	eax, 13					; 0000000dH
	xor	eax, ecx
	and	eax, 8191				; 00001fffH
; Line 728
	ret	0
_FNV1A_Hash ENDP
*/

/*
Wayne Diamond implemented 32-bit FNV algorithm in PowerBASIC inline x86 assembly:


    FUNCTION FNV32(BYVAL dwOffset AS DWORD, BYVAL dwLen AS DWORD, BYVAL offset_basis AS DWORD) AS DWORD
    #REGISTER NONE
    ! mov esi, dwOffset      ;esi = ptr to buffer
    ! mov ecx, dwLen         ;ecx = length of buffer (counter)
    ! mov eax, offset_basis  ;set to 2166136261 for FNV-1
    ! mov edi, &h01000193    ;FNV_32_PRIME = 16777619
    ! xor ebx, ebx           ;ebx = 0
    nextbyte:
    ! mul edi                ;eax = eax * FNV_32_PRIME
    ! mov bl, [esi]          ;bl = byte from esi
    ! xor eax, ebx           ;al = al xor bl
    ! inc esi                ;esi = esi + 1 (buffer pos)
    ! dec ecx                ;ecx = ecx - 1 (counter)
    ! jnz nextbyte           ;if ecx is 0, jmp to NextByte
    ! mov FUNCTION, eax      ;else, function = eax
    END FUNCTION

Wayne said:

    ''Just thought I should let you know that I've ported the 32-bit FNV algorithm over to inline assembly. 
It's actually in PowerBASIC (www.powerbasic.com) format - a compiler I use, but the main function is all assembly. 
It could be optimized further in terms of saving a couple of clock cycles, 
but it's fairly optimized al ready - only 6 instructions in the main loop, plus 5 setup instructions, 
and compiles to just 33 bytes.'' 

M.S.Schulte sent us these 32-bit FNV-1 and FNV-1a x86 assembler implementations (written in flat assembler), 
half of which were optimized for speed, the other half were optimized for size:

    small_fnv32:    ;FNV1 32bit (size: 31 bytes)
    ;                Intel Core 2 Duo E6600: 354.20 mb/s
       push    esi
       push    edi
       mov     esi, [esp + 0ch] ;buffer
       mov     ecx, [esp + 10h] ;length
       mov     eax, [esp + 14h] ;basis
       mov     edi, 01000193h   ;fnv_32_prime
     next:
       mul     edi
       xor     al, [esi]
       inc     esi
       loop    snext
       pop     edi
       pop     esi
       retn    0ch

     small_fnv32a:   ;FNV1a 32bit (size: 31 bytes)
    ;                 Intel Core 2 Duo E6600: 327.68 mb/s
       push    esi
       push    edi
       mov     esi, [esp + 0ch] ;buffer
       mov     ecx, [esp + 10h] ;length
       mov     eax, [esp + 14h] ;basis
       mov     edi, 01000193h   ;fnv_32_prime
     nexta:
       xor     al, [esi]
       mul     edi
       inc     esi
       loop    nexta
       pop     edi
       pop     esi
       retn    0ch

    fast_fnv32:    ;FNV1 32bit (size: 36 bytes)
    ;               Intel Core 2 Duo E6600: 565.12 mb/s
       push    ebx
       push    esi
       push    edi
       mov     esi, [esp + 10h] ;buffer
       mov     ecx, [esp + 14h] ;length
       mov     eax, [esp + 18h] ;basis
       mov     edi, 01000193h   ;fnv_32_prime
       xor     ebx, ebx
     next:
       mul     edi
       mov     bl, [esi]
       xor     eax, ebx
       inc     esi
       dec     ecx
       jnz     next
       pop     edi
       pop     esi
       pop     ebx
       retn    0ch

     fast_fnv32a:   ;FNV1a 32bit (size: 36 bytes)
    ;                Intel Core 2 Duo E6600: 574.95 mb/s
       push    ebx
       push    esi
       push    edi
       mov     esi, [esp + 10h] ;buffer
       mov     ecx, [esp + 14h] ;length
       mov     eax, [esp + 18h] ;basis
       mov     edi, 01000193h   ;fnv_32_prime
       xor     ebx, ebx
     nexta:
       mov     bl, [esi]
       xor     eax, ebx
       mul     edi
       inc     esi
       dec     ecx
       jnz     nexta
       pop     edi
       pop     esi
       pop     ebx
       retn    0ch
*/

//Number Of Trees(GREATER THE BETTER): 3525737
//Forest population(Hash Function Quality regarding Collisions i.e. Hash Table Utilization): 53%
//Number Of Hash Collisions(Distinct WORDs - Number Of Trees): 18677243
int Hash17_unrolled(const char *key, int wrdlen)
{
	int hash = 1;
        int i;
	for(i = 0; i < (wrdlen & -2); i += 2) {
		hash = (17) * hash + (key[i] - ' ');
		hash = (17) * hash + (key[i+1] - ' ');
	}
	if(wrdlen & 1)
		hash = (17) * hash + (key[wrdlen-1] - ' ');
	return ( hash ^ (hash >> 16) ) & 8191;
}

//hash = 1:
//Number Of Trees(GREATER THE BETTER): 3556516
//Forest population(Hash Function Quality regarding Collisions i.e. Hash Table Utilization): 53%
//Number Of Hash Collisions(Distinct WORDs - Number Of Trees): 18646464
//hash = 13:
//Number Of Trees(GREATER THE BETTER): 3556755
//Forest population(Hash Function Quality regarding Collisions i.e. Hash Table Utilization): 53%
//Number Of Hash Collisions(Distinct WORDs - Number Of Trees): 18646225
//hash = 11:
//Number Of Trees(GREATER THE BETTER): 3557011
//Forest population(Hash Function Quality regarding Collisions i.e. Hash Table Utilization): 53%
//Number Of Hash Collisions(Distinct WORDs - Number Of Trees): 18645969
//hash = 7:
//Number Of Trees(GREATER THE BETTER): 3557181
//Forest population(Hash Function Quality regarding Collisions i.e. Hash Table Utilization): 53%
//Number Of Hash Collisions(Distinct WORDs - Number Of Trees): 18645799
int Alfalfa(const char *key, int wrdlen)
{
	int hash = 7;
        int i;
	for(i = 0; i < (wrdlen & -2); i += 2) {
		hash = (17+9) * ((17+9) * hash + (key[i])) + (key[i+1]);
	}
	if(wrdlen & 1)
		hash = (17+9) * hash + (key[wrdlen-1]);
	return ( hash ^ (hash >> 16) ) & 8191;
}

/*
[FNV1A 'shift-less-&-xor-less' hash used in Leprechaun r.13+++:]

int FNV1A_Hash_SHIFTless_XORless(char *str)
{ 
u_int32_t hash;
char *p;

hash = FNV1_32_INIT;
for (p=str; *p; ++p) {
    hash = FNV_32A_OP(hash, *p);
}
//hash = ((hash>>13) ^ hash) & 8191; // (((u_int32_t)1<<(x))-1) where x=13

  return hash & 8191; // 00..8191 i.e. 2^13=8192
}

Words per second performance: 837,458W/s
Input File with a list of TEXTual Files: wikipedia-en-html.tar.wrd.lst
Word count: 12,561,874 of them 12,561,874 distinct
Number Of Trees(GREATER THE BETTER): 2772875
Forest population(Hash Function Quality regarding Collisions i.e. Hash Table Utilization): 41%
Number Of Hash Collisions(Distinct WORDs - Number Of Trees): 9788999

Words per second performance: 1,007,751W/s
Input File with a list of TEXTual Files: Leprechaun_vs_Wikipedia_LATIN-WORDS.lst
Word count: 35,271,297 of them 22,202,980 distinct
Number Of Trees(GREATER THE BETTER): 3537061
Forest population(Hash Function Quality regarding Collisions i.e. Hash Table Utilization): 53%
Number Of Hash Collisions(Distinct WORDs - Number Of Trees): 18665919

[My '2in1' hash used in Leprechaun r.13++:]

int KuxHash3plus(char *str)
{ int h = 0;
  unsigned long h2 = 0; // must be long: 31*'z'=31*122
  int max31 = 0;
  while (str[max31])
  { h = h ^ str[max31]; // 00..255 i.e. 2^8=256
    //h2 = h2 + str[max31++];       // [113s]
    h2 = h2 + str[max31++] * (max31+1);
  }
// Result is: 7bits in 'h' and 32bits in 'h2'.

                        //printf("%s:\n ",str);
                        //printf("%d ",h);
// a in ASCII is 097 = 0110 0001
// z in ASCII is 122 = 0111 1010
// Above two lines show that bits 8-7-6 are always 0-1-1 so need for low 5 bits.
  //h=h<<8; // 00..15 i.e. 5bits + 00-07bits=13bits
                        //printf("%d ",h);
                        //printf("%d ",h2);
  //h = h|( str[0] ^ str[max31-1] ); // [111s] a..z: each XOR each gives 00..31
  h = (( h<<8 )|( h2%(251) ))&8191; // 251 prime
                        //printf("%d \n",h);
  return h; // 00..8191 i.e. 2^13=8192
}

Words per second performance: 785,117W/s
Input File with a list of TEXTual Files: wikipedia-en-html.tar.wrd.lst
Word count: 12,561,874 of them 12,561,874 distinct
Number Of Trees(GREATER THE BETTER): 2663566
Forest population(Hash Function Quality regarding Collisions i.e. Hash Table Utilization): 40%
Number Of Hash Collisions(Distinct WORDs - Number Of Trees): 9898308

Words per second performance: 979,758W/s
Input File with a list of TEXTual Files: Leprechaun_vs_Wikipedia_LATIN-WORDS.lst
Word count: 35,271,297 of them 22,202,980 distinct
Number Of Trees(GREATER THE BETTER): 3410463
Forest population(Hash Function Quality regarding Collisions i.e. Hash Table Utilization): 51%
Number Of Hash Collisions(Distinct WORDs - Number Of Trees): 18792517

[Last standing for English(en)-Wikipedia's wordlist:]
chongo's hash is faster(in total, not the function itself) than Kaze's hash by ((837,458W/s - 785,117W/s)/785,117W/s)*100% = 6.6%
chongo's hash has better distribution than Kaze's hash by ((9898308 - 9788999)/9788999)*100% = 1.1%

[Last standing for LATIN(de,en,es,fr,it,nl,pt,ro)-Wikipedia's wordlist:]
chongo's hash is faster(in total, not the function itself) than Kaze's hash by ((1,007,751W/s - 979,758W/s)/979,758W/s)*100% = 2.8%
chongo's hash has better distribution than Kaze's hash by ((18792517 - 18665919)/18665919)*100% = 0.6%

Bottomline is:
Your hash thrash, my hash for trash, he-he.
Thanks a lot, again, Mr. Noll.

Yummy little file: http://www.isthe.com/chongo/src/fnv/fnv-5.0.2.tar.gz
*/

/*
// Paul Larson (http://research.microsoft.com/~PALARSON/)
UINT HashLarson(const CHAR *key, SIZE_T len) {
	UINT hash = 0;
	for(UINT i = 0; i < len; ++i)
		hash = 101 * hash + key[i];
	return hash ^ (hash >> 16);
}

// Kernighan & Ritchie, "The C programming Language", 3rd edition.
UINT HashKernighanRitchie(const CHAR *key, SIZE_T len) {
	UINT hash = 0;
	for(UINT i = 0; i < len; ++i)
		hash = 31 * hash + key[i];
	return hash;
}

// A hash function with multiplier 65599 (from Red Dragon book)
UINT Hash65599(const CHAR *key, SIZE_T len) {
	UINT hash = 0;
	for(UINT i = 0; i < len; ++i)
		hash = 65599 * hash + key[i];
	return hash ^ (hash >> 16);
}

// FNV hash, http://isthe.com/chongo/tech/comp/fnv/
UINT HashFNV1a(const CHAR *key, SIZE_T len) {
	UINT hash = 2166136261;
	for(UINT i = 0; i < len; ++i)
		hash = 16777619 * (hash ^ key[i]);
	return hash ^ (hash >> 16);
}

// Ramakrishna hash
UINT HashRamakrishna(const CHAR *key, SIZE_T len) {
	UINT h = 0;
	for(UINT i = 0; i < len; ++i) {
		h ^= (h << 5) + (h >> 2) + key[i];
	}
	return h;
}
*/

/*
Results for 'Hash_Alfalfa':
Bytes per second performance: 19,808,709B/s
Words per second performance: 1,679,585W/s
Input File with a list of TEXTual Files: Leprechaun_vs_Wikipedia_LATIN-WORDS.lst
Size of all TEXTual Files: 415,982,896
Word count: 35,271,297 of them 22,202,980 distinct
Number Of Files: 8
Number Of Lines: 35271297
Allocated memory in MB: 1950
Number Of Trees(GREATER THE BETTER): 3549079
Forest population(Hash Function Quality regarding Collisions i.e. Hash Table Utilization): 53%
Number Of Hash Collisions(Distinct WORDs - Number Of Trees): 18653901
Maximum Attempts to Find/Put a WORD into a Binary-Search-Tree: '37'
Total Attempts to Find/Put WORDs into Binary-Search-Trees: 117,063,824
Total Number of LEAFs in Binary-Search-Trees(GREATER THE BETTER): 8,072,279
Perfectly-Balanced-Binary-Search-Tree for MaxNODEs = 84 must have PEAK = 7 = rounding down of integer (1+lb(84))
Binary-Search-Tree(1st out of 2) with MaxNODEs = 84 has PEAK = 20 and LEAFs = 24
Binary-Search-Tree(1st out of 3) with MaxPEAK = '37' has NODEs = 67 and LEAFs = 17
Binary-Search-Tree(1st out of 1) with MaxLEAFs = 28 has NODEs = 78 and PEAK = 22
*/
UINT Hash_Alfalfa(const char *key, unsigned int wrdlen)
{
	UINT hash = 7;
	unsigned int i;
	for (i = 0; i < (wrdlen & -2); i += 2) {
		hash = (53) * ((53) * hash + (key[i])) + (key[i+1]);
	}
	if (wrdlen & 1)
		hash = (53) * hash + (key[wrdlen-1]);
	return ((hash>>16) ^ hash) & 8191;
}

/*
Results for 'HashAlfalfa_HALF':
Bytes per second performance: 19,808,709B/s
Words per second performance: 1,679,585W/s
Input File with a list of TEXTual Files: Leprechaun_vs_Wikipedia_LATIN-WORDS.lst
Size of all TEXTual Files: 415,982,896
Word count: 35,271,297 of them 22,202,980 distinct
Number Of Files: 8
Number Of Lines: 35271297
Allocated memory in MB: 1950
Number Of Trees(GREATER THE BETTER): 3550665
Forest population(Hash Function Quality regarding Collisions i.e. Hash Table Utilization): 53%
Number Of Hash Collisions(Distinct WORDs - Number Of Trees): 18652315
Maximum Attempts to Find/Put a WORD into a Binary-Search-Tree: '39'
Total Attempts to Find/Put WORDs into Binary-Search-Trees: 117,053,918
Total Number of LEAFs in Binary-Search-Trees(GREATER THE BETTER): 8,072,259
Perfectly-Balanced-Binary-Search-Tree for MaxNODEs = 87 must have PEAK = 7 = rounding down of integer (1+lb(87))
Binary-Search-Tree(1st out of 1) with MaxNODEs = 87 has PEAK = 21 and LEAFs = 27
Binary-Search-Tree(1st out of 2) with MaxPEAK = '39' has NODEs = 65 and LEAFs = 18
Binary-Search-Tree(1st out of 4) with MaxLEAFs = 27 has NODEs = 77 and PEAK = 23
*/
UINT HashAlfalfa_HALF(const char *key, unsigned int wrdlen)
{
	UINT hash = 12;
	UINT hashBUFFER;
	unsigned int i,j;
	for(i = 0; i < (wrdlen & -4); i += 4) {
		//hash = (( ((hash<<5)-hash) + key[i] )<<5) - ( ((hash<<5)-hash) + key[i] ) + (key[i+1]);
		hashBUFFER = ((hash<<5)-hash) + key[i];
		hash = (( hashBUFFER )<<5) - ( hashBUFFER ) + (key[i+1]);
		//hash = (( ((hash<<5)-hash) + key[i+2] )<<5) - ( ((hash<<5)-hash) + key[i+2] ) + (key[i+3]);
		hashBUFFER = ((hash<<5)-hash) + key[i+2];
		hash = (( hashBUFFER )<<5) - ( hashBUFFER ) + (key[i+3]);
	}
	for(j = 0; j < (wrdlen & 3); j += 1) {
		hash = ((hash<<5)-hash) + key[i+j];
	}
	return ((hash>>16) ^ hash) & 8191;
}

/*
Results for 'HashFNV1A_unrolled_Final':
Bytes per second performance: 19,808,709B/s
Words per second performance: 1,679,585W/s
Input File with a list of TEXTual Files: Leprechaun_vs_Wikipedia_LATIN-WORDS.lst
Size of all TEXTual Files: 415,982,896
Word count: 35,271,297 of them 22,202,980 distinct
Number Of Files: 8
Number Of Lines: 35271297
Allocated memory in MB: 1950
Number Of Trees(GREATER THE BETTER): 3445337
Forest population(Hash Function Quality regarding Collisions i.e. Hash Table Utilization): 52%
Number Of Hash Collisions(Distinct WORDs - Number Of Trees): 18757643
Maximum Attempts to Find/Put a WORD into a Binary-Search-Tree: '43'
Total Attempts to Find/Put WORDs into Binary-Search-Trees: 118,349,998
Total Number of LEAFs in Binary-Search-Trees(GREATER THE BETTER): 7,997,033
Perfectly-Balanced-Binary-Search-Tree for MaxNODEs = 89 must have PEAK = 7 = rounding down of integer (1+lb(89))
Binary-Search-Tree(1st out of 1) with MaxNODEs = 89 has PEAK = 28 and LEAFs = 28
Binary-Search-Tree(1st out of 1) with MaxPEAK = '43' has NODEs = 65 and LEAFs = 11
Binary-Search-Tree(1st out of 2) with MaxLEAFs = 28 has NODEs = 78 and PEAK = 24
*/
UINT HashFNV1A_unrolled_Final(char *str, unsigned int wrdlen)
{ 
	//const UINT PRIME = 31;
	unsigned int hash = 2166136261;
	char * p = str;

/*
	// Reduce the number of multiplications by unrolling the loop
	for (SIZE_T ndwords = wrdlen / sizeof(DWORD); ndwords; --ndwords) {
		//hash = (hash ^ *(DWORD*)p) * PRIME;
		hash = ((hash ^ *(DWORD*)p)<<5) - (hash ^ *(DWORD*)p);

		p += sizeof(DWORD);
	}
*/
for(; wrdlen >= 4; wrdlen -= 4, p += 4) {
		hash = ((hash ^ *(unsigned int*)p)<<5) - (hash ^ *(unsigned int*)p);
}

	// Process the remaining bytes
/*
	for (SIZE_T i = 0; i < (wrdlen & (sizeof(DWORD) - 1)); i++) {
		//hash = (hash ^ *p++) * PRIME;
		hash = ((hash ^ *p)<<5) - (hash ^ *p);
		p++;
	}
*/
	if (wrdlen & -2) {
		hash = ((hash ^ (*(unsigned int*)p&0xFFFF))<<5) - (hash ^ (*(unsigned int*)p&0xFFFF));
		p++;p++;
	}
	if (wrdlen & 1) 
		hash = ((hash ^ *p)<<5) - (hash ^ *p);

	return ((hash>>16) ^ hash) & 8191;
}

/*
Results for 'Sixtinsensitive':
Bytes per second performance: 19,808,709B/s
Words per second performance: 1,679,585W/s
Input File with a list of TEXTual Files: Leprechaun_vs_Wikipedia_LATIN-WORDS.lst
Size of all TEXTual Files: 415,982,896
Word count: 35,271,297 of them 22,202,980 distinct
Number Of Files: 8
Number Of Lines: 35271297
Allocated memory in MB: 1950
Number Of Trees(GREATER THE BETTER): 3531949
Forest population(Hash Function Quality regarding Collisions i.e. Hash Table Utilization): 53%
Number Of Hash Collisions(Distinct WORDs - Number Of Trees): 18671031
Maximum Attempts to Find/Put a WORD into a Binary-Search-Tree: '38'
Total Attempts to Find/Put WORDs into Binary-Search-Trees: 118,959,016
Total Number of LEAFs in Binary-Search-Trees(GREATER THE BETTER): 8,047,983
Perfectly-Balanced-Binary-Search-Tree for MaxNODEs = 98 must have PEAK = 7 = rounding down of integer (1+lb(98))
Binary-Search-Tree(1st out of 1) with MaxNODEs = 98 has PEAK = 36 and LEAFs = 30
Binary-Search-Tree(1st out of 1) with MaxPEAK = '38' has NODEs = 54 and LEAFs = 11
Binary-Search-Tree(1st out of 2) with MaxLEAFs = 30 has NODEs = 98 and PEAK = 36
*/
// Tuned for lowercase-and-uppercase letters i.e. 26 ASCII symbols 65-90 and 97-122 decimal.
UINT Sixtinsensitive(const char *str, unsigned int wrdlen)
{ 
	UINT hash = 2166136261;
	UINT hashBUFFER_EAX, hashBUFFER_BH, hashBUFFER_BL;
	const char * p = str;

// Ox41 = 065 'A' 010 [0 0001]
// Ox5A = 090 'Z' 010 [1 1010]
// Ox61 = 097 'a' 011 [0 0001]
// Ox7A = 122 'z' 011 [1 1010]

	// Reduce the number of multiplications by unrolling the loop
	for(; wrdlen >= 6; wrdlen -= 6, p += 6) {
		//hashBUFFER_AX = (*(DWORD*)(p+0)&0xFFFF);
		hashBUFFER_EAX = (*(DWORD*)(p+0)&0x1F1F1F1F);
		hashBUFFER_BL = (*(p+4)&0x1F);
		hashBUFFER_BH = (*(p+5)&0x1F);
		//6bytes-in-4bytes or 48bits-to-30bits
		// Two times next:
		//3bytes-in-2bytes or 24bits-to-15bits
		//EAX                     BL          BH
		//[5bit][3bit][5bit][3bit][5bit][3bit][5bit][3bit]
		//       5th[0..15]  13th[0..15]
		//       BL lower 3  BL higher 2bits
		// OR or XOR no difference
		hashBUFFER_EAX = hashBUFFER_EAX ^ ((hashBUFFER_BL&0x07)<<5); // BL lower 3bits of 5bits
		hashBUFFER_EAX = hashBUFFER_EAX ^ ((hashBUFFER_BL&0x18)<<(2+8)); // BL higher 2bits of 5bits
		hashBUFFER_EAX = hashBUFFER_EAX ^ ((hashBUFFER_BH&0x07)<<(5+16)); // BH lower 3bits of 5bits
		hashBUFFER_EAX = hashBUFFER_EAX ^ ((hashBUFFER_BH&0x18)<<((2+8)+16)); // BH higher 2bits of 5bits
		//hash = (hash ^ hashBUFFER_EAX)*1607; //What a mess: <<7 becomes imul but <<5 not!?
		hash = ((hash ^ hashBUFFER_EAX)<<5) - (hash ^ hashBUFFER_EAX);
		//1607:[2118599]
		// 127:[2121081]
		//  31:[2139242]
		//  17:[2150803]
		//   7:[2166336]
		//   5:[2183044]
		//8191:[2200477]
		//   3:[2205095]
		// 257:[2206188]
	}
// Post-Variant #1:
	for(; wrdlen; wrdlen--, p++) {
		hash = ((hash ^ (*p&0x1F))<<5) - (hash ^ (*p&0x1F));
	}
/*
// Post-Variant #2:
	for(; wrdlen >= 2; wrdlen -= 2, p += 2) {
		hash = ((hash ^ (*(DWORD*)p&0xFFFF))<<5) - (hash ^ (*(DWORD*)p&0xFFFF));
	}
	if (wrdlen & 1) 
		hash = ((hash ^ *p)<<5) - (hash ^ *p);
*/
/*
// Post-Variant #3:
	for(; wrdlen >= 4; wrdlen -= 4, p += 4) {
		hash = ((hash ^ *(DWORD*)p)<<5) - (hash ^ *(DWORD*)p);
	}
	if (wrdlen & -2) {
		hash = ((hash ^ (*(DWORD*)p&0xFFFF))<<5) - (hash ^ (*(DWORD*)p&0xFFFF));
		p++;p++;
	}
	if (wrdlen & 1) 
		hash = ((hash ^ *p)<<5) - (hash ^ *p);
*/
	return ((hash>>16) ^ hash) & 8191;
}

/*
#define FNV1_32_INIT ((UINT)2166136261)
#define FNV1_32_PRIME ((UINT)1709)

#define FNV_32A_OP(hash, octet) \
    (((UINT)(hash) ^ (unsigned char)(octet)) * FNV1_32_PRIME)

#define FNV_32A_OP32(hash, octet) \
    (((UINT)(hash) ^ (UINT)(octet)) * FNV1_32_PRIME)

UINT FNV1A_Hash_WHIZ(const char *str, SIZE_T wrdlen)
{ 

UINT hash32;
const char *p;

hash32 = FNV1_32_INIT;
p=str;

for(; wrdlen >= 4; wrdlen -= 4, p += 4) {
	hash32 = FNV_32A_OP32(hash32, (UINT)*(UINT *)p);
}
	if (wrdlen & -2) {
		hash32 = FNV_32A_OP32(hash32, *(UINT*)p&0xFFFF);
		p++;p++;
	}
	if (wrdlen & 1) 
		hash32 = FNV_32A_OP(hash32, *p);

return hash32 ^ (hash32 >> 16);
}
*/

/*
Results for 'FNV1A_Hash_Jester':
Bytes per second performance: 19,808,709B/s
Words per second performance: 1,679,585W/s
Input File with a list of TEXTual Files: Leprechaun_vs_Wikipedia_LATIN-WORDS.lst
Size of all TEXTual Files: 415,982,896
Word count: 35,271,297 of them 22,202,980 distinct
Number Of Files: 8
Number Of Lines: 35271297
Allocated memory in MB: 1950
Number Of Trees(GREATER THE BETTER): 3537352
Forest population(Hash Function Quality regarding Collisions i.e. Hash Table Utilization): 53%
Number Of Hash Collisions(Distinct WORDs - Number Of Trees): 18665628
Maximum Attempts to Find/Put a WORD into a Binary-Search-Tree: '37'
Total Attempts to Find/Put WORDs into Binary-Search-Trees: 117,243,563
Total Number of LEAFs in Binary-Search-Trees(GREATER THE BETTER): 8,063,361
Perfectly-Balanced-Binary-Search-Tree for MaxNODEs = 87 must have PEAK = 7 = rounding down of integer (1+lb(87))
Binary-Search-Tree(1st out of 2) with MaxNODEs = 87 has PEAK = 27 and LEAFs = 23
Binary-Search-Tree(1st out of 1) with MaxPEAK = '37' has NODEs = 66 and LEAFs = 18
Binary-Search-Tree(1st out of 3) with MaxLEAFs = 27 has NODEs = 84 and PEAK = 27
*/
UINT FNV1A_Hash_Jester(const char *str, unsigned int wrdlen)
{
	const UINT PRIME = 709607;
	UINT hash32 = 2166136261;
	const char *p = str;

	// Idea comes from Igor Pavlov's 7zCRC, thanks.
/*
	for(; wrdlen && ((unsigned)(ptrdiff_t)p&3); wrdlen -= 1, p++) {
		hash32 = (hash32 ^ *p) * PRIME;
	}
*/
	for(; wrdlen >= 2*sizeof(DWORD); wrdlen -= 2*sizeof(DWORD), p += 2*sizeof(DWORD)) {
		hash32 = (hash32 ^ *(DWORD *)p) * PRIME;
		hash32 = (hash32 ^ *(DWORD *)(p+4)) * PRIME;
	}
	// Cases: 0,1,2,3,4,5,6,7
	if (wrdlen & sizeof(DWORD)) {
		hash32 = (hash32 ^ *(DWORD*)p) * PRIME;
		p += sizeof(DWORD);
	}
	if (wrdlen & sizeof(WORD)) {
		hash32 = (hash32 ^ *(WORD*)p) * PRIME;
		p += sizeof(WORD);
	}
	if (wrdlen & 1) 
		hash32 = (hash32 ^ *p) * PRIME;
	
	return (hash32 ^ (hash32 >> 16)) & 8191;
}

/*
Results for 'FNV1A_Hash_Jesteress':
Bytes per second performance: 19,808,709B/s
Words per second performance: 1,679,585W/s
Input File with a list of TEXTual Files: Leprechaun_vs_Wikipedia_LATIN-WORDS.lst
Size of all TEXTual Files: 415,982,896
Word count: 35,271,297 of them 22,202,980 distinct
Number Of Files: 8
Number Of Lines: 35271297
Allocated memory in MB: 1950
Number Of Trees(GREATER THE BETTER): 3537293
Forest population(Hash Function Quality regarding Collisions i.e. Hash Table Utilization): 53%
Number Of Hash Collisions(Distinct WORDs - Number Of Trees): 18665687
Maximum Attempts to Find/Put a WORD into a Binary-Search-Tree: '40'
Total Attempts to Find/Put WORDs into Binary-Search-Trees: 117,526,680
Total Number of LEAFs in Binary-Search-Trees(GREATER THE BETTER): 8,051,512
Perfectly-Balanced-Binary-Search-Tree for MaxNODEs = 89 must have PEAK = 7 = rounding down of integer (1+lb(89))
Binary-Search-Tree(1st out of 1) with MaxNODEs = 89 has PEAK = 25 and LEAFs = 23
Binary-Search-Tree(1st out of 1) with MaxPEAK = '40' has NODEs = 49 and LEAFs = 8
Binary-Search-Tree(1st out of 1) with MaxLEAFs = 28 has NODEs = 72 and PEAK = 21
*/
#define ROL(x, n) (((x) << (n)) | ((x) >> (32-(n))))
UINT FNV1A_Hash_Jesteress(const char *str, unsigned int wrdlen)
{
	const UINT PRIME = 709607;
	UINT hash32 = 2166136261;
	const char *p = str;

	// Idea comes from Igor Pavlov's 7zCRC, thanks.
/*
	for(; wrdlen && ((unsigned)(ptrdiff_t)p&3); wrdlen -= 1, p++) {
		hash32 = (hash32 ^ *p) * PRIME;
	}
*/
	for(; wrdlen >= 2*sizeof(DWORD); wrdlen -= 2*sizeof(DWORD), p += 2*sizeof(DWORD)) {
		hash32 = (hash32 ^ (ROL(*(DWORD *)p,5)^*(DWORD *)(p+4))) * PRIME;		
	}
	// Cases: 0,1,2,3,4,5,6,7
	if (wrdlen & sizeof(DWORD)) {
		hash32 = (hash32 ^ *(DWORD*)p) * PRIME;
		p += sizeof(DWORD);
	}
	if (wrdlen & sizeof(WORD)) {
		hash32 = (hash32 ^ *(WORD*)p) * PRIME;
		p += sizeof(WORD);
	}
	if (wrdlen & 1) 
		hash32 = (hash32 ^ *p) * PRIME;
	
	return (hash32 ^ (hash32 >> 16)) & 8191;
}

UINT FNV1A_Hash_Jesteress_27bit(const char *str, unsigned int wrdlen)
{
	const UINT PRIME = 709607;
	UINT hash32 = 2166136261;
	const char *p = str;

	// Idea comes from Igor Pavlov's 7zCRC, thanks.
/*
	for(; wrdlen && ((unsigned)(ptrdiff_t)p&3); wrdlen -= 1, p++) {
		hash32 = (hash32 ^ *p) * PRIME;
	}
*/
	for(; wrdlen >= 2*sizeof(DWORD); wrdlen -= 2*sizeof(DWORD), p += 2*sizeof(DWORD)) {
		hash32 = (hash32 ^ (ROL(*(DWORD *)p,5)^*(DWORD *)(p+4))) * PRIME;		
	}
	// Cases: 0,1,2,3,4,5,6,7
	if (wrdlen & sizeof(DWORD)) {
		hash32 = (hash32 ^ *(DWORD*)p) * PRIME;
		p += sizeof(DWORD);
	}
	if (wrdlen & sizeof(WORD)) {
		hash32 = (hash32 ^ *(WORD*)p) * PRIME;
		p += sizeof(WORD);
	}
	if (wrdlen & 1) 
		hash32 = (hash32 ^ *p) * PRIME;
	
	return (hash32 ^ (hash32 >> 16)) & ((1<<HashInBITS)-1);
}

/*
UINT NextPowerOfTwo(UINT x) {
	// Henry Warren, "Hacker's Delight", ch. 3.2
	x--;
	x |= (x >> 1);
	x |= (x >> 2);
	x |= (x >> 4);
	x |= (x >> 8);
	x |= (x >> 16);
	return x + 1;
}

UINT NextLog2(UINT x) {
	// Henry Warren, "Hacker's Delight", ch. 5.3
	if(x <= 1) return x;
	x--;
	UINT n = 0;
	UINT y;
	y = x >>16; if(y) {n += 16; x = y;}
	y = x >> 8; if(y) {n +=  8; x = y;}
	y = x >> 4; if(y) {n +=  4; x = y;}
	y = x >> 2; if(y) {n +=  2; x = y;}
	y = x >> 1; if(y) return n + 2;
	return n + x;
}
*/

// The following example code in the C language computes the binary logarithm (rounding down) of an integer, rounded down. [2] The operator '>>' represents 'unsigned right shift'. The rounding down form of binary logarithm is identical to computing the position of the most significant 1 bit.
/**
 * Returns the floor form of binary logarithm for a 32 bit integer.
 * -1 is returned if n is 0.
 */
int floorLog2(unsigned int n) {
  int pos = 0;
  if (n >= 1<<16) { n >>= 16; pos += 16; }
  if (n >= 1<< 8) { n >>=  8; pos +=  8; }
  if (n >= 1<< 4) { n >>=  4; pos +=  4; }
  if (n >= 1<< 2) { n >>=  2; pos +=  2; }
  if (n >= 1<< 1) {           pos +=  1; }
  return ((n == 0) ? (-1) : pos);
}

// QuickSortExternal_4+GB.c [

int strcmpKAZE13 (
        const char * src,
        const char * dst
        )
{
        int ret = 0 ;

        while( ! (ret = *(unsigned char *)src - *(unsigned char *)dst) && (*dst!=13-13))
                ++src, ++dst;

        if ( ret < 0 )
                ret = -1 ;
        else if ( ret > 0 )
                ret = 1 ;

        return( ret );
}

//#define LongestLineInclusive 51 //31 former, CAUTION: for command line options 'x' and 'y' it cannot be other than 31 [YET]!

#ifdef singleton
#define LongestLineInclusive 31
#endif
#ifdef doubleton
#define LongestLineInclusive 41
#endif
#ifdef tripleton
#define LongestLineInclusive 41
#endif
#ifdef quadrupleton
#define LongestLineInclusive 51
#endif
#ifdef quintupleton
#define LongestLineInclusive 61
#endif
#ifdef sextupleton
#define LongestLineInclusive 71
#endif
#ifdef septupleton
#define LongestLineInclusive 81
#endif
#ifdef octupleton
#define LongestLineInclusive 91
#endif
#ifdef nonupleton
#define LongestLineInclusive 101
#endif
#ifdef decupleton
#define LongestLineInclusive 111
#endif

// _ngram_  1  1-31
// _ngram_  2  5-41
// _ngram_  3  9-41
// _ngram_  4 13-51
// _ngram_  5 17-61
// _ngram_  6 21-71
// _ngram_  7 25-81
// _ngram_  8 29-91
// _ngram_  9 33-101
// _ngram_ 10 37-111
// For Leaf of 256bytes LongestLineInclusive should be 256 = 8+8+8+2*(LongestLineInclusive+1+4) or LongestLineInclusive = (256 - (8+8+8) - 2*(1+4))/2 = 111

char FourGramL[LongestLineInclusive+1+4]; // 31bytes longest 4-gram + 1byte NULL + 4bytes COUNTER
char FourGramR[LongestLineInclusive+1+4]; // 31bytes longest 4-gram + 1byte NULL + 4bytes COUNTER
char LEAF[8+8+8+2*(LongestLineInclusive+1+4)]; // 136bytes = 3 pointers + 2 keys
char LEAFNEW[8+8+8+2*(LongestLineInclusive+1+4)]; // 136bytes = 3 pointers + 2 keys
FILE *fp_outRG; // Global - not to burden the extract/compare function with one more parameter
int CompareStringsEndingWith13_EXTERNAL(unsigned long long AtPosition64L, unsigned long long AtPosition64R) {

int i;
unsigned long long *AtPosition64Lpointer=&AtPosition64L;
unsigned long long *AtPosition64Rpointer=&AtPosition64R;

// Caramba: seek and tell report OK but in fact they lie, only setpos works?!?!?!

//#if defined(_WIN32_ENVIRONMENT_)
//_lseeki64( fileno(fp_outRG), AtPosition64L, 0 );
//#else
//fseeko( fp_outRG, AtPosition64L, SEEK_SET );
//#endif /* defined(_WIN32_ENVIRONMENT_)  */

//  _CRTIMP __int64 __cdecl _telli64(int);
//  off64_t ftello64 (FILE *stream)  


fsetpos(fp_outRG, AtPosition64Lpointer);
	for (i=0; i<(LongestLineInclusive+1+4); i++) {fread(&FourGramL[i], 1, 1, fp_outRG); if (FourGramL[i]==13-13) break;}
	//Commented line below is slower than the one above: 778156 clocks vs 756297 clocks.
	//fread(&FourGramL[0], 31+1, 1, fp_outRG);

fsetpos(fp_outRG, AtPosition64Rpointer);
	for (i=0; i<(LongestLineInclusive+1+4); i++) {fread(&FourGramR[i], 1, 1, fp_outRG); if (FourGramR[i]==13-13) break;}
	//Commented line below is slower than the one above: 778156 clocks vs 756297 clocks.
	//fread(&FourGramR[0], 31+1, 1, fp_outRG);

return(strcmpKAZE13(FourGramL, FourGramR));
}

int CompareStringsEndingWith13_INTERNAL(unsigned long long AtPosition64L, unsigned long long AtPosition64R, char *POOLinternal) {

int i;
//char FourGramL[LongestLineInclusive+2]; // 31 longest 4-gram + CR + LF
//char FourGramR[LongestLineInclusive+2]; // 31 longest 4-gram + CR + LF

for (i=0; i<(LongestLineInclusive+1+4); i++) {
	//fread(&FourGramL[i], 1, 1, fp_in);
	FourGramL[i] = *(char *)(POOLinternal + AtPosition64L);
	if (FourGramL[i]==13-13) break;
}

for (i=0; i<(LongestLineInclusive+1+4); i++) {
	//fread(&FourGramR[i], 1, 1, fp_in);
	FourGramR[i] = *(char *)(POOLinternal + AtPosition64R);
	if (FourGramR[i]==13-13) break;
}

return(strcmpKAZE13(FourGramL, FourGramR));
}

// QuickSortExternal_4+GB.c ]


int main( argc, argv )
int argc; char *argv[];
{
   int nlines;
   string *backup = NULL;

      FILE *fp_in, *fp_out, *fp_outLOG, *fp_inLINE;
      int LetterOffset;
      unsigned long long FilesLEN;
      unsigned long long WORDcount; 
      unsigned long long WORDcountBOTTOM; 
      unsigned long long WORDcountAttemptsToPut; 
      int Thunderwith;
        unsigned long NumberOfFiles, WORDcountDistinct, WORDcountDistinctTOTAL = 0, TotalMemoryNeededForOnePass = 0;
        unsigned long long NumberOfLines;  // rev. 12+
      unsigned long WHOLEletter_BufferSize;
      unsigned long long WHOLEletter_BufferSize_L14;
        unsigned long memory_size, LetterBuffer, j, k, LINE10len, wrdlen;
        unsigned long k_FIX;
        unsigned long long i;              // rev. 12+
        //unsigned long size_in, size_out, size_inLINE;
        unsigned long size_in;           // rev. 12+
#if defined(_WIN32_ENVIRONMENT_)
      unsigned long long size_inLINESIXFOUR;
#else
      size_t size_inLINESIXFOUR;
#endif /* defined(_WIN32_ENVIRONMENT_)  */

      //unsigned long t1, t2, t3;
      time_t t1, t2, t3, t4,tMainB,tMainE;

      const int NumberOfSLOTs = 4096*2; // Since r.12+ in rev.12 it was 4096
  unsigned long StackPtr;
  //unsigned long BSTstack [65536*3]; // BST in worst case could become a LL.
  unsigned long long BSTstack [8192*3]; // BST in worst case could become a LL.
unsigned long NumberOfTrees=0, NumberOfHashCollisions=0;
unsigned long iBSTwithMAXpeak, jBSTwithMAXpeak;
  unsigned int PEAKibBST;
  unsigned long BSTsTotalLEAFs=0; // ?! MADHOUSE: if BSTcurrent is not zeroed here then INSANE values of BSTcurrent occur below where 'break' is ?!
  unsigned long BSTwithMAXnode=0, BSTcurrentNode=0; // ?! MADHOUSE: if BSTcurrent is not zeroed here then INSANE values of BSTcurrent occur below where 'break' is ?!
  unsigned long                   BSTcurrentNodeMAXqUANTITY=0; // ?! MADHOUSE: if BSTcurrent is not zeroed here then INSANE values of BSTcurrent occur below where 'break' is ?!
  unsigned long BSTwithMAXnodePEAK=1, BSTwithMAXnodeLEAF=1; // ?! MADHOUSE: if BSTcurrent is not zeroed here then INSANE values of BSTcurrent occur below where 'break' is ?!
  unsigned long BSTwithMAXpeak=0, BSTcurrentPeak=0; // ?! MADHOUSE: if BSTcurrent is not zeroed here then INSANE values of BSTcurrent occur below where 'break' is ?!
  unsigned long                   BSTcurrentPeakMAX=0; // ?! MADHOUSE: if BSTcurrent is not zeroed here then INSANE values of BSTcurrent occur below where 'break' is ?!
  unsigned long                   BSTcurrentPeakMAXqUANTITY=0; // ?! MADHOUSE: if BSTcurrent is not zeroed here then INSANE values of BSTcurrent occur below where 'break' is ?!
  unsigned long BSTwithMAXpeakNODE=1, BSTwithMAXpeakLEAF=1; // ?! MADHOUSE: if BSTcurrent is not zeroed here then INSANE values of BSTcurrent occur below where 'break' is ?!
  unsigned long BSTwithMAXleaf=0, BSTcurrentLeaf=0; // ?! MADHOUSE: if BSTcurrent is not zeroed here then INSANE values of BSTcurrent occur below where 'break' is ?!
  unsigned long                   BSTcurrentLeafMAXqUANTITY=0; // ?! MADHOUSE: if BSTcurrent is not zeroed here then INSANE values of BSTcurrent occur below where 'break' is ?!
  unsigned long BSTwithMAXleafNODE=1, BSTwithMAXleafPEAK=1; // ?! MADHOUSE: if BSTcurrent is not zeroed here then INSANE values of BSTcurrent occur below where 'break' is ?!

      char *pointerflush, *pointerflushUNALIGN, *BufStart, *Flushing;
      char *pointerflush_64, *pointerflushUNALIGN_64; // r.14++
      unsigned long PseudoLinkedPointer, PseudoLinkedPointerNEW, PseudoLinkedPointerROOT, PseudoLinkedPointerNEWold;
      unsigned long PseudoLinkedPointerNEWleft, PseudoLinkedPointerNEWright;
      unsigned long PseudoLinkedPointerNEWmiddle;
      char *bufend[ 806 ]; // 'a'=0, ... 'z'=25 - 26 letters x 31 lengths
      long bufNumberOfWords[ 806 ]; // 'a'=0, ... 'z'=25 - 26 letters x 31 lengths
      // long bufNoWpS[ 806 ][ 8192 ]; // ?! crashes below when an attempt to use it occur
      char wrd[LongestLineInclusive+1+4];  // 0..30, 31 = 0
      char wrdUP[LongestLineInclusive+1+4];  // 0..30, 31 = 0
      char wrdUPold[LongestLineInclusive+1+4];  // 0..30, 31 = 0
      char LINE10[257]; // 000..255, 256 = 0
      char ZEROS[4]; // 0..3, 0 = 0, 1 = 0, 2 = 0, 3 = 0
      char CRdLFa[2];  // 0..1, 0 = 13, 1 = 10
      char workbyte;
      char workK[1024*128];
      long workKoffset = -1;
      int FoundInLinkedList, Slot;
      unsigned long OffsetsInBuffer[31]; // 00..30
      unsigned long MAXusedBuffer[32]; // 00 not used, only 01..31
      unsigned long GRMBLhill[32]; // 00..31
      unsigned long GRMBLFoolAgain[32]; // 00..31
      int Melnitchka;
      unsigned long MAXusedBufferABS = 0;
      unsigned long Utiliza1 = 0;
      unsigned long Utiliza2 = 0;
      unsigned long TotalWLchars = 0;

/* minimum signed 64 bit value */
#define _I64_MIN    (-9223372036854775807i64 - 1)
/* maximum signed 64 bit value */
#define _I64_MAX      9223372036854775807i64
/* maximum unsigned 64 bit value */
#define _UI64_MAX     0xffffffffffffffffui64

/* minimum signed 128 bit value */
#define _I128_MIN   (-170141183460469231731687303715884105727i128 - 1)
/* maximum signed 128 bit value */
#define _I128_MAX     170141183460469231731687303715884105727i128
/* maximum unsigned 128 bit value */
#define _UI128_MAX    0xffffffffffffffffffffffffffffffffui128

      char llTOaDigits[27]; // 9,223,372,036,854,775,807: 1(sign or carry)+19(digits)+1('\0')+6(,)
      // below duplicates are needed because of one_line_invoking need different buffers.
      char llTOaDigits2[27]; // 9,223,372,036,854,775,807: 1(sign or carry)+19(digits)+1('\0')+6(,)
      char llTOaDigits3[27]; // 9,223,372,036,854,775,807: 1(sign or carry)+19(digits)+1('\0')+6(,)
      char llTOaDigits4[27]; // 9,223,372,036,854,775,807: 1(sign or carry)+19(digits)+1('\0')+6(,)
      unsigned long HEADOffetFromStartBUKVA = 0;
      unsigned long TAILOffetFromStartBUKVA = 0;
      int BSTorBtree = 0;
      int SplitOccured;
      int POffsetInLEAF;
char *Auberge[4] = {"|\0","/\0","-\0","\\\0"};
int hashAlfalfa, iAlfalfa;
int PLE_words=0; // Quadruple!
char wrd1st[LongestLineInclusive+1+4];  // 0..30, 31 = 0
char wrd2nd[LongestLineInclusive+1+4];  // 0..30, 31 = 0
char wrd3rd[LongestLineInclusive+1+4];  // 0..30, 31 = 0
char wrd4th[LongestLineInclusive+1+4];  // 0..30, 31 = 0
char wrd5th[LongestLineInclusive+1+4];  // 0..30, 31 = 0
char wrd6th[LongestLineInclusive+1+4];  // 0..30, 31 = 0
char wrd7th[LongestLineInclusive+1+4];  // 0..30, 31 = 0
char wrd8th[LongestLineInclusive+1+4];  // 0..30, 31 = 0
char wrd9th[LongestLineInclusive+1+4];  // 0..30, 31 = 0
char wrd10th[LongestLineInclusive+1+4];  // 0..30, 31 = 0
char *DelimiterUnderscore = "_\0";
int PLE_words_INITflag = 0;

// QuickSortExternal_4+GB [
unsigned long long ThunderwithL64_L14;
unsigned long long Strnglen64_L14;
unsigned long long size_in64_L14, size_in2_L14;
unsigned long long Over4billionLines, j_Over4billion;
char OneChar_ieByte = '\0';
char CR_ieByte = '\r';
char SomeByte;
unsigned long long BufEnd_64;
unsigned long long SeekPosition;
unsigned long long *PointerToSeekPosition;
char FourGram[LongestLineInclusive+2]; // 31 longest 4-gram + CR + LF
char *PoolPhysical;
unsigned long long fsetpos_ZERO=0;
char OneCkusterZEROES[1024*4]; // Caution: must be ZEROed(NULLified)!
char *FileSwapTag = "LEPRECHAUNISH";
char EOFcode = 0x1A;
unsigned long long PseudoLinkedPointer_64, PseudoLinkedPointerNEW_64, PseudoLinkedPointerROOT_64, PseudoLinkedPointerNEWold_64;
      unsigned long long PseudoLinkedPointerNEWleft_64, PseudoLinkedPointerNEWright_64;
      unsigned long long PseudoLinkedPointerNEWmiddle_64;
      unsigned long long NULLs_64 = 0;
unsigned long long PseudoLinkedPointerAUX_64;
unsigned long long PseudoLinkedPointerAUXdumbo_64;
      char wrdAUX[LongestLineInclusive+1+4];  // 0..30, 31 = 0
// QuickSortExternal_4+GB ]

unsigned long CounterOccurrencies;
unsigned long long NumberOfLEAFs=0;
unsigned long LevelsInCorona_Not_Counting_ROOT=0;
char *ngram[11] = {"NULLleton\0","singleton\0","doubleton\0","tripleton\0","quadrupleton\0","quintupleton\0","sextupleton\0","septupleton\0","octupleton\0","nonupleton\0","decupleton\0"};

unsigned long RipPasses;
unsigned long long NULLsForWRD=0;

//15+
int DoNotInsertFlag = 0;
int METACOMMANDFlag;

//16
int REUSE=0;
int HSHexist;
int SWPexist;

// INIT INIT INIT INIT INIT INIT INIT INIT INIT INIT INIT INIT INIT INIT INIT
printf ("Leprechaun_%s (Fast-In-Future Greedy n-gram-Ripper), rev. 16FIX, written by Svalqyatchx.\n", ngram[_ngram_]);
//puts( "Leprechaun: 'Oh, well, didn't you hear? Bigger is good, but jumbo is dear.'" );

#ifdef singleton
printf ("Purpose: Rips all distinct %d-grams (%d-word phrases) with length 1..31 chars from incoming texts.\n", _ngram_, _ngram_);
#endif
#ifdef doubleton
printf ("Purpose: Rips all distinct %d-grams (%d-word phrases) with length 5..41 chars from incoming texts.\n", _ngram_, _ngram_);
#endif
#ifdef tripleton
printf ("Purpose: Rips all distinct %d-grams (%d-word phrases) with length 9..41 chars from incoming texts.\n", _ngram_, _ngram_);
#endif
#ifdef quadrupleton
printf ("Purpose: Rips all distinct %d-grams (%d-word phrases) with length 13..51 chars from incoming texts.\n", _ngram_, _ngram_);
#endif
#ifdef quintupleton
printf ("Purpose: Rips all distinct %d-grams (%d-word phrases) with length 17..61 chars from incoming texts.\n", _ngram_, _ngram_);
#endif
#ifdef sextupleton
printf ("Purpose: Rips all distinct %d-grams (%d-word phrases) with length 21..71 chars from incoming texts.\n", _ngram_, _ngram_);
#endif
#ifdef septupleton
printf ("Purpose: Rips all distinct %d-grams (%d-word phrases) with length 25..81 chars from incoming texts.\n", _ngram_, _ngram_);
#endif
#ifdef octupleton
printf ("Purpose: Rips all distinct %d-grams (%d-word phrases) with length 29..91 chars from incoming texts.\n", _ngram_, _ngram_);
#endif
#ifdef nonupleton
printf ("Purpose: Rips all distinct %d-grams (%d-word phrases) with length 33..101 chars from incoming texts.\n", _ngram_, _ngram_);
#endif
#ifdef decupleton
printf ("Purpose: Rips all distinct %d-grams (%d-word phrases) with length 37..111 chars from incoming texts.\n", _ngram_, _ngram_);
#endif
puts( "Feature1: All words within x-lets/n-grams are in range 1..31 chars inclusive." );
//puts( "Feature2: In this revision 128MB 1-way hash is used which results in 16,777,216 external B-Trees of order 3." );

if (HashInBITS+3<10)
printf ("Feature2: In this revision %sbytes 1-way hash is used which results in %s external B-Trees of order 3.\n", _ui64toaKAZEcomma(((1<<HashInBITS)<<3), llTOaDigits, 10), _ui64toaKAZEcomma((1<<HashInBITS), llTOaDigits2, 10) );
else if (HashInBITS+3>=10 && HashInBITS+3<20)
printf ("Feature2: In this revision %sKB 1-way hash is used which results in %s external B-Trees of order 3.\n", _ui64toaKAZEcomma( (((1<<HashInBITS)<<3))>>10, llTOaDigits, 10), _ui64toaKAZEcomma((1<<HashInBITS), llTOaDigits2, 10) );
else
printf ("Feature2: In this revision %sMB 1-way hash is used which results in %s external B-Trees of order 3.\n", _ui64toaKAZEcomma( (((1<<HashInBITS)<<3))>>20, llTOaDigits, 10), _ui64toaKAZEcomma((1<<HashInBITS), llTOaDigits2, 10) );
if (HashInBITS-HashChunkSizeInBITS==0)
printf ("Feature3: In this revision %s pass is to be made.\n", _ui64toaKAZEcomma(1<<(HashInBITS-HashChunkSizeInBITS), llTOaDigits, 10));
else
printf ("Feature3: In this revision %s passes are to be made.\n", _ui64toaKAZEcomma(1<<(HashInBITS-HashChunkSizeInBITS), llTOaDigits, 10));

puts( "Feature4: If the external memory has latency 99+microseconds then !(look no further), IOPS(seek-time) rules." );
// The phrase 'look no further' was used in amazon.com review meaning 'stop searching for better thing this is it'.
//puts( "Kaze: Let's see what a 3-way hash + 6,602,752 Binary-Search-Trees can give us," );
//puts( "      also the performance of a 3-way hash + 6,602,752 B-Trees of order 3," );
//puts( "      also the performance of a 1-way hash + 134,217,728 external B-Trees of order 3." );
//puts( "Note1: Compiled with Microsoft C v. 13.10.3077: 'cl /Ox /TcLeprechaun.c'." );
//puts( "Note2: This WORDLISTER makes as output pseudo(unsorted)_wordlist_CRLF_file." );
if( argc != 3 && argc != 4 && argc != 5 && argc != 6 ) // +1 for program name
{
  puts( "" );
puts( "'The Little Monster' short notes:" );
puts( "Note1: I wish to thank to R.N. Horspool, Ranjan Sinha, Dmitry Shkarin," );
puts( "       Michael Abrash, J. Bentley, R. Sedgewick, Igor Pavlov, Lasse Reinhold," );
puts( "       Landon Noll, Peter Kankowski for sharing their knowledge to public." );
puts( "Note2: Run it without parameters to get usage and short notes." );
puts( "Note3: This simple amateurish(more over I am not versed well neither in C nor" );
puts( "       in mathematics nor in English language, but I am persistent in INDEXING" );
puts( "       GBs of english TEXTS) tool is written in ANSI C(at least its source is" );
puts( "       compileable for CL(Windows) and GCC(Linux)), and its purpose is to" );
puts( "       create a WordList for a group of files(given via filelist)." );
puts( "       Its name comes(according to Heritage Dictionary) from 'low corpus' or" );
puts( "       'little body', in fact from amazing movie saga 'Leprechaun 1-2-3-4-5-6'" );
puts( "       starring by Warwick Davis." );
puts( "Note4: Only words up to 31 chars are proceeded - the reason is 'DDT'(the" );
puts( "       longest word in Heritage Dictionary 3rd edition) or" );
puts( "       'dichlorodiphenyltrichloroethane'." );
puts( "Note5: Cursor hiding in C - mission impossible for me." );
puts( "Note6: By default(third parameter is 1023) allocated memory is 393MB." );
puts( "       Due to 'malloc()' limitation under WINDOWS, maximum value of third" );
puts( "       parameter is 5174 which is 1988MB allocated block." );
puts( "Note7: File Leprechaun.LOG is a log, where new statistics are appended." );
puts( "Note8: Revision 12+ can handle files larger than 4GB." );
puts( "Note9: Revision 12++ has a buffered 'fread()' - therefore I/O READ-BURST SPEED" );
puts( "       is the first(worst) bottleneck, as a result r.12++ is much-much faster;" );
puts( "       the second(worse) bottleneck: the linked lists - the b-trees" );
puts( "       might be the answer; the third(bad) bottleneck: the amateurish author." );
puts( "NoteA: Revision 12+++ has an improved(2 bits were used doltishly) main hash" );
puts( "       function - therefore less collisions, for example:" );
puts( "       for file 'wikipedia-de-html.tar' 42,291,855,360 bytes with" );
puts( "       5,750,179,678 words of them 7,375,373 distinct attempts to Find/Put" );
puts( "       a WORD into a linked list are 6,117,675,470(r.12++) and 5,845,989,790" );
puts( "       (r.12+++); also two 'if' sections were moved because they were executed" );
puts( "       unnecessarily many times." );
puts( "NoteB: Revision 13 uses BSTs instead of LLs, that is Linked-Lists were" );
puts( "       replaced by Binary-Search-Trees, as a result for 22,202,980 distinct" );
puts( "       words(out of 35,271,297) r.12+++ needs 225,548,268 total attempts to" );
puts( "       Find/Put WORDs into linked lists where r.13 needs 121,674,042 total" );
puts( "       attempts to Find/Put WORDs into Binary-Search-Trees. But this is a" );
puts( "       significant boost in performance only for wordlists of million words." );
puts( "NoteC: Revision 13+ gives only more statistics. Future revisions could lessen" );
puts( "       number of attempts to Find/Put WORDs into Binary-Search-Trees" );
puts( "       furthermore by making them at some point Perfectly-Balanced. But" );
puts( "       for huge amount(multi-(m|b)illion) of distinct words the b-tree family" );
puts( "       must come in, until then this is the leprechaunish niche." );
puts( "NoteD: Revision 13++ has a little fix(2 unnecessary ZEROings, when a new word" );
puts( "       is inserted, were deleted) and a fixed bug(13+ adds stupidly the" );
puts( "       highest BST to the wordlist). Also B-Tree of order 3 is added as a" );
puts( "       searching method. Main goal of B-Tree is to reduce number of" );
puts( "       comparisons but at nasty cost: a precious time wasted to construct it" );
puts( "       and twice more memory, i.e. one step forward two backward: this tree is" );
puts( "       more effective than BST in cases of 2++ billion/million" );
puts( "       different/distinct words." );
puts( "       The improvement which comes from using B-Tree of order 3 is about 200%" );
puts( "       much more pleasing than I expected, for wikipedia-en-html.tar.wrd with" );
puts( "       12,561,874 distinct words Total Attempts to Find/Put WORDs into:" );
puts( "       Binary-Search-Trees was 61,895,043 while for" );
puts( "       B-trees order 3 was 19,295,791." );
puts( "NoteE: Revision 13+++ has a faster(not heavily tested yet) and with" );
puts( "       better(0.6% to 1.1%) dispersion Fowler/Noll/Vo hash," );
puts( "       so called FNV1a hash. Revision 13+++++ boosting: Leprechaun_Intel.exe" );
puts( "       gives 1,256,187W/s for wikipedia-en-html.tar.wrd with FNV1_32_PRIME:" );
puts( "       107712257 with 3,551,736 dispersion for 'FNV1A_Hash_Granularity'." );
puts( "NoteF: For old r.12+ a USB connected HDD crippled test:" );
puts( "       for 'H:\\>Leprechaun.exe static.wikipedia.org_downloads_2008-06_en.lst" );
puts( "       wikipedia-en-html.tar.wrd 5400'" );
puts( "       where 223,674,511,360 wikipedia-en-html.tar" );
puts( "       on laptop Toshiba Pentium T3400 2166 MHz with" );
puts( "       Motherboard Name:                             Toshiba Satellite L305" );
puts( "       CPU Type:         Mobile DualCore Intel Pentium, 2166 MHz (13 x 167)" );  
puts( "       CPU Alias:                                                  Merom-1M" );  
puts( "       L1 Code Cache:                                        32 KB per core" );  
puts( "       L1 Data Cache:                                        32 KB per core" );  
puts( "       L2 Cache:                        1 MB (On-Die, ECC, ASC, Full-Speed)" );  
puts( "       Bus Type:                                            Dual DDR2 SDRAM" );  
puts( "       Bus Width:                                                   128-bit" );  
puts( "       Real Clock:                                            333 MHz (DDR)" );  
puts( "       Effective Clock:                                             666 MHz" );
puts( "       EVEREST v5.00.1650 Memory Copy:       3725MB/s with timings 5-5-5-13" );
puts( "       result is logged to 'Leprechaun.LOG':" );
puts( "  Bytes per second performance: 20,658,955B/s" );
puts( "  Words per second performance: 2,860,880W/s" );
puts( "  Input File with a list of TEXTual Files:" );
puts( "   static.wikipedia.org_downloads_2008-06_en.lst" );
puts( "  Size of all TEXTual Files: 223,674,511,360" );
puts( "  Word count: 30,974,750,142 of them 12,561,874 distinct" );
puts( "  Number Of Files: 1" );
puts( "  Number Of Lines: 2088618575" );
puts( "  Allocated memory in MB: 1920" );
puts( "  Words with length 01 occupy 0,033KB of 0,349KB given i.e. 09% utilization" );
puts( "  Words with length 02 occupy 0,033KB of 0,349KB given i.e. 09% utilization" );
puts( "  Words with length 03 occupy 0,037KB of 0,697KB given i.e. 05% utilization" );
puts( "  Words with length 04 occupy 0,151KB of 0,871KB given i.e. 17% utilization" );
puts( "  Words with length 05 occupy 0,744KB of 1,568KB given i.e. 47% utilization" );
puts( "  Words with length 06 occupy 1,470KB of 3,136KB given i.e. 46% utilization" );
puts( "  Words with length 07 occupy 2,605KB of 5,923KB given i.e. 43% utilization" );
puts( "  Words with length 08 occupy 3,296KB of 6,968KB given i.e. 47% utilization" );
puts( "  Words with length 09 occupy 3,714KB of 6,968KB given i.e. 53% utilization" );
puts( "  Words with length 10 occupy 3,483KB of 6,968KB given i.e. 49% utilization" );
puts( "  Words with length 11 occupy 3,235KB of 5,923KB given i.e. 54% utilization" );
puts( "  Words with length 12 occupy 2,691KB of 4,181KB given i.e. 64% utilization" );
puts( "  Words with length 13 occupy 2,230KB of 3,484KB given i.e. 64% utilization" );
puts( "  Words with length 14 occupy 1,718KB of 3,484KB given i.e. 49% utilization" );
puts( "  Words with length 15 occupy 1,357KB of 2,613KB given i.e. 51% utilization" );
puts( "  Words with length 16 occupy 1,063KB of 2,613KB given i.e. 40% utilization" );
puts( "  Words with length 17 occupy 0,814KB of 1,742KB given i.e. 46% utilization" );
puts( "  Words with length 18 occupy 0,617KB of 1,742KB given i.e. 35% utilization" );
puts( "  Words with length 19 occupy 0,485KB of 1,742KB given i.e. 27% utilization" );
puts( "  Words with length 20 occupy 0,402KB of 1,742KB given i.e. 23% utilization" );
puts( "  Words with length 21 occupy 0,327KB of 1,742KB given i.e. 18% utilization" );
puts( "  Words with length 22 occupy 0,274KB of 1,742KB given i.e. 15% utilization" );
puts( "  Words with length 23 occupy 0,224KB of 1,394KB given i.e. 16% utilization" );
puts( "  Words with length 24 occupy 0,190KB of 1,394KB given i.e. 13% utilization" );
puts( "  Words with length 25 occupy 0,162KB of 1,394KB given i.e. 11% utilization" );
puts( "  Words with length 26 occupy 0,136KB of 1,220KB given i.e. 11% utilization" );
puts( "  Words with length 27 occupy 0,119KB of 1,046KB given i.e. 11% utilization" );
puts( "  Words with length 28 occupy 0,107KB of 0,871KB given i.e. 12% utilization" );
puts( "  Words with length 29 occupy 0,091KB of 0,697KB given i.e. 13% utilization" );
puts( "  Words with length 30 occupy 0,080KB of 0,523KB given i.e. 15% utilization" );
puts( "  Words with length 31 occupy 0,076KB of 0,523KB given i.e. 14% utilization" );
puts( "  Total pseudo(including hash table) memory utilization: 42%" );
puts( "  Total real(wordlist's words VS allocated block) memory utilization: 60/1000" );
puts( "  Used value for third parameter in KB: 5400" );
puts( "  Use next time as third parameter: 3475-" );
puts( "  Time for making unsorted wordlist: 10827 second(s)" );
puts( "  Time for sorting unsorted wordlist: 10 second(s)" );
puts( "NoteG: 2011-Mar-07: Fixed a small command line parsing bug." );
puts( "NoteH: A heavy blow for my illusions(regarding speed performance of external b-trees)," );
puts( "       desperate results for ripping on HDD 7200rpm:" );
puts( "       20,000,000 distinct 4-grams per 5 hours." );
puts( "       D:\\Leprechaun_quadrupleton_r14_minus>Leprechaun_quadrupleton GRAFFITH_2048.lst GRAFFITH_2048.wrd 48000000 z" );
puts( "       Leprechaun(Fast Greedy Word-Ripper), rev. 14_minus_quadrupleton, written by Svalqyatchx." );
puts( "       Leprechaun: 'Oh, well, didn't you hear? Bigger is good, but jumbo is dear.'" );
puts( "       Kaze: Let's see what a 3-way hash + 6,602,752 Binary-Search-Trees can give us," );
puts( "             also the performance of a 3-way hash + 6,602,752 B-Trees of order 3," );
puts( "             also the performance of a 1-way hash + 134,217,728 external B-Trees of order 3." );
puts( "       Size of input file with files for Leprechauning: 42140" );
puts( "       Allocating HASH memory 1,073,741,889 bytes ... OK" );
puts( "       Allocating/ZEROing 49,152,000,014 bytes swap file ... OK" );
puts( "       Size of Input TEXTual file: 33,470,581" );
puts( "       |; Word count: 3,045,077 of them 2,597,942 distinct; Done: 64/64" );
puts( "       Size of Input TEXTual file: 17,229,900" );
puts( "       -; Word count: 4,235,032 of them 3,588,757 distinct; Done: 64/64" );
puts( "       Size of Input TEXTual file: 19,191,256" );
puts( "       |; Word count: 5,803,400 of them 4,866,213 distinct; Done: 64/64" );
puts( "       Size of Input TEXTual file: 34,651,077" );
puts( "       \\; Word count: 8,714,961 of them 6,941,108 distinct; Done: 64/64" );
puts( "       Size of Input TEXTual file: 26,875,458" );
puts( "       /; Word count: 11,022,830 of them 8,579,931 distinct; Done: 64/64" );
puts( "       Size of Input TEXTual file: 19,605,129" );
puts( "       -; Word count: 12,924,821 of them 10,078,191 distinct; Done: 64/64" );
puts( "       Size of Input TEXTual file: 17,053,521" );
puts( "       /; Word count: 14,577,010 of them 11,455,983 distinct; Done: 64/64" );
puts( "       Size of Input TEXTual file: 44,087,709" );
puts( "       -; Word count: 18,953,280 of them 15,010,569 distinct; Done: 64/64" );
puts( "       Size of Input TEXTual file: 32,796,705" );
puts( "       |; Word count: 22,412,912 of them 17,621,649 distinct; Done: 64/64" );
puts( "       Size of Input TEXTual file: 19,538,360" );
puts( "       /; Word count: 24,381,005 of them 19,137,701 distinct; Done: 64/64" );
puts( "       Size of Input TEXTual file: 29,565,366" );
puts( "       \\; Word count: 26,214,400 of them 20,528,357 distinct; Done: 40/64" );
puts( "       ..." );
puts( "NoteI: In revision 14- the resultant wordlist is NOT sorted when 'Z' is used." );
puts( "NoteJ: In revision 14 'x' and 'y' options are disabled, for 7++ million phrases their usefulness is no more," );
puts( "       the real loads are of order 800+ million, too many limitations exist, they must be rewritten as 64bit." );
puts( "NoteK: Ripping OSHO.TXT (10,165,640 4-grams) on HDD daunts because of 6+hours needed:" );
puts( "       Number Of Trees(GREATER THE BETTER): 9,433,894" );
puts( "       Used value for third parameter in KB: 3,145,728" );
puts( "       Use next time as third parameter: 1,262,186" );
puts( "       One leaf has size: 8+8+8+(51+1+4)+(51+1+4)=136bytes," );
puts( "       or MAX (one 4-gram per leaf) 10,165,640*136=1,382,527,040bytes." );
puts( "NoteL: Each phrase in extracted file is preceded by TAB ASCII code, this (TAB being a delimiter symbol) allows" );
puts( "       the phrase-list to be ripped again i.e. to treat already ripped files as any other text." );
puts( "NoteM: Too many 'fsetpos', 'fread', 'fwrite' invocations were put in the straight port (from 32bit internal memory to" );
puts( "       64bit external memory), a optimization is needed, something like reading/writing a LEAF at once." );
puts( "NoteN: Since revision 14+: Optimized(LEAFwise) search (fragment 1] and 2]), insert (fragment 3]) and dump." );
puts( "NoteO: In next revisions a 2in1 is to be done i.e. one code fragment will deal with virtual and physical memory," );
puts( "       thus establishing pure 64bit mode of operation, a single flag will decide whether 'memcpy' or" );
puts( "       the slow I/O triad sub-fragments will be used. DONE." );
puts( "NoteP: In next revisions a multi-pass (by chunking the hash table) mode is to be added in order to avoid" );
puts( "       these sick-seeks. DONE." );
puts( "NoteQ: Fixed occurrencies bug due to not NULLifying the field housing the occurrencies, a nasty thing: all" );
puts( "       the revisions 14??? were buggy, how stupid from my side, grumble." );
puts( "NoteR: In r.14+++++FIXFIX were fixed STATS(Leprechaun.LOG) bugs (appearing only in multi-pass mode) due to not" );
puts( "       NULLifying the variables housing the stats, they do not affect the results - they are for informative use." );
puts( "NoteS: Fixed a division-by-zero bug, occurs when finishing-starting time is under 1 second." );
puts( "       Fixed a nasty bug causing very restrictive way of forming x-grams." );
puts( "NoteT: At last and finally the nasty bug causing very restrictive way of forming x-grams was REALLY fixed - lack of" );
puts( "       calmness jammed (again) my actions - a lesson to be relearnt." );

puts( "NoteU: Since r.15FIXFIX+ the ability to command Leprechaun (from inside the list file with 2 metacommands) to enter/exit" );
puts( "       INSERT mode was added. This allows to control whether new (to current hash-tree structure) x-grams are to be counted" );
puts( "       [and] INSERTed. These two metacommands are:" );
puts( "       Leprechaun says x-gram inserting disabled for next files: ON" );
puts( "       Leprechaun says x-gram inserting disabled for next files: OFF" );

puts( "NoteV: When W/w option is used multiple-passes shouldn't be dumped - it is meaningless, dump when only one pass," );
puts( "       that is, use W/w only in ONE-PASS mode otherwise it behaves as Z/z but DOES NOT dump to OutFile." );
puts( "       It uses in READ mode the two HASH+TREES output files: 'Leprechaun_64bit.hsh' and 'Leprechaun_64bit.swp'." );
puts( "       If during the start one of them is missing then Z/z behaviour is on, at end 'Leprechaun_64bit.hsh' is dumped." );
puts( "       Also the OutFile has all incoming x-grams which are present in the corpus (i.e. HASH+TREES structure)." );
  puts( "" );
  puts( "Usage: Leprechaun InFile OutFile [BufferSize] [SortMethod] [TreeMethod]" );
  puts( "      <InFile>: Input file with files for Leprechauning, in WINDOWS console" );
  puts( "                you can create it by 'E:\\KAZEHOME>dir *.txt/s/b>Leprechaun.lst'" );
  puts( "      <OutFile>: Output WORDLIST(sorted since r.9, CRLF) file" );
  puts( "      <BufferSize>: Optional Dynamic RAM buffer in KB, default(and minimum" );
  puts( "                    in the same time) is 1023, i.e. omit or specify greater one" );
  puts( "      <SortMethod>: Optional Sort Method, default is 'D'," );
  puts( "                    A - InsertionSort" );
  puts( "                    B - InsertionX26Sort" );
  puts( "                    C - MultiKeyQuickSortSort by J. Bentley, R. Sedgewick" );
  puts( "                    D - MultiKeyQuickSortX26Sort' by J. Bentley, R. Sedgewick" );
  puts( "      <TreeMethod>: Optional Tree Method, default is 'X'," );
  puts( "                    X - Binary-Search-Trees" );
  puts( "                    y - B-Trees of order 3, INTERNAL/fast memory digitless i.e. no repetitions, 64bit addressing!" );
  puts( "                    Y - B-Trees of order 3, INTERNAL/fast memory, 64bit addressing!" );
  puts( "                    z - B-Trees of order 3, EXTERNAL/slow memory digitless i.e. no repetitions, 64bit addressing!" );
  puts( "                    Z - B-Trees of order 3, EXTERNAL/slow memory, 64bit addressing!" );
  puts( "                    w - B-Trees of order 3, EXTERNAL/slow memory digitless i.e. no repetitions, 64bit addressing! REUSE!" );
  puts( "                    W - B-Trees of order 3, EXTERNAL/slow memory, 64bit addressing! REUSE!" );
  puts( "" );
puts( "Have a nice Leprechauning." );
puts( "For contacts: sanmayce@sanmayce.com" );
puts( "Sanmayce Svalqyatchx 'Kaze', 2005 Feb 07. Last revision: 2012 Dec 16." );
  return( 1 );
}

  GRMBLhill[0]=0;
  GRMBLhill[1]=1;
  GRMBLhill[2]=1;
  GRMBLhill[3]=1;
  GRMBLhill[4]=1;
  GRMBLhill[5]=1;
  GRMBLhill[6]=1;
  GRMBLhill[7]=1;
  GRMBLhill[8]=1;
  GRMBLhill[9]=1;
  GRMBLhill[10]=1;
  GRMBLhill[11]=1;
  GRMBLhill[12]=15;
  GRMBLhill[13]=15;
  GRMBLhill[14]=15;
  GRMBLhill[15]=20;
  GRMBLhill[16]=30;
  GRMBLhill[17]=40;
  GRMBLhill[18]=50;
  GRMBLhill[19]=50;
  GRMBLhill[20]=50;
  GRMBLhill[21]=40;
  GRMBLhill[22]=40;
  GRMBLhill[23]=40;
  GRMBLhill[24]=30;
  GRMBLhill[25]=20;
  GRMBLhill[26]=20;
  GRMBLhill[27]=20;
  GRMBLhill[28]=20;
  GRMBLhill[29]=20;
  GRMBLhill[30]=10;
  GRMBLhill[31]=10;

(void) time(&tMainB);

if( ( fp_out = fopen( argv[2], "wb+" ) ) == NULL )
{ printf( "Leprechaun: Can't create file %s \n", argv[2] ); return( 1 ); }
fclose(fp_out); // The file must be with size 0 because it is opened for appending down below.

// 2^(HashInBITS-HashChunkSizeInBITS)=2^0=1 passe(s).
// 14++++ [
//for( RipPasses = 1-1; RipPasses <= (1<<(HashInBITS-HashChunkSizeInBITS))-1; RipPasses++ )
//{
// 14++++ ]
RipPasses = 1-1;
WhyTheHellForIsNotWorking:
printf( "Pass #%lu of %lu:\n", RipPasses+1, (1<<(HashInBITS-HashChunkSizeInBITS)));

if( ( fp_in = fopen( argv[1], "rb" ) ) == NULL )
{ printf( "Leprechaun: Can't open file %s \n", argv[1] ); return( 1 ); }

fseek( fp_in, 0L, SEEK_END );
size_in = ftell( fp_in );
fseek( fp_in, 0L, SEEK_SET );
printf( "Size of input file with files for Leprechauning: %lu\n", size_in );

if( ( fp_outLOG = fopen( "Leprechaun.LOG", "a+" ) ) == NULL )
{ printf( "Leprechaun: Can't open file Leprechaun.LOG.\n" ); return( 1 ); }

// argc is 4|5|6 due to eventual missing BufferSize
if( argc == 4 ) // not 6 due to eventual missing BufferSize and SortMethod
    k_FIX = 3;
if( argc == 5)  // not 6 due to eventual missing BufferSize or SortMethod
    k_FIX = 4;
if( argc == 6 )  
    k_FIX = 5;
if (*argv[k_FIX] == 'Y' || *argv[k_FIX] == 'y') BSTorBtree = 1+2; // +2 since r.14++
if (*argv[k_FIX] == 'Z' || *argv[k_FIX] == 'z') BSTorBtree = 2;
if (*argv[k_FIX] == 'W' || *argv[k_FIX] == 'w') {BSTorBtree = 2; REUSE=1;}

if( argc == 4 || argc == 5 || argc == 6 ) Thunderwith = atoi( argv[3] );
else Thunderwith = 527; // for r.12:   527=17*31 this is minimum because of 4096*1*4=16KB+ needed for each buffer!
                        // for r.12+: 1023=33*31 this is minimum because of 4096*2*4=32KB+ needed for each buffer!
if (Thunderwith < 1023) {Thunderwith = 1023;}

//printf( "Use next time as third parameter: %s\n", _ui64toaKAZEcomma((25123456789>>10)+1, llTOaDigits, 10) );
//printf( "Use next time as third parameter: %s\n", _ui64toaKAZEcomma((25123456789/1024)+1, llTOaDigits, 10) );

if (BSTorBtree < 2) { printf( "Leprechaun: In this particular revision 'x' option is disabled.\n" ); return( 1 ); }

if (BSTorBtree < 2) {
LetterBuffer = Thunderwith * 1024;
  WHOLEletter_BufferSize = 0;
  for( i = 1; i <= 31; i++ )
  { OffsetsInBuffer[i-1] = 0;
    for( j = 1; j <= i; j++ )
    { OffsetsInBuffer[i-1] = OffsetsInBuffer[i-1] + (GRMBLhill[(int)(j-1)] * LetterBuffer)/31;
    }
    WHOLEletter_BufferSize = WHOLEletter_BufferSize + (GRMBLhill[(int)i] * LetterBuffer)/31; 
    GRMBLFoolAgain[(int)i] = (GRMBLhill[(int)i] * LetterBuffer)/31;
  }                           
  memory_size = 26 * WHOLEletter_BufferSize + 1 + 64;
printf( "Allocating memory %luMB ... ", (memory_size>>20)+1 );
pointerflushUNALIGN = (char *)malloc( memory_size );
if( pointerflushUNALIGN == NULL )
{ puts( "\nLeprechaun: Needed memory allocation denied!\n" ); return( 1 ); }
pointerflush = pointerflushUNALIGN + 64 - (((size_t)pointerflushUNALIGN) % 64); // 13_6+
//offset=64-int((long)data&63);

printf( "OK\n");
                  fprintf( fp_outLOG, "Leprechaun report:\n" );

// Check once for ever whether allocated memory is ZEROed!? Answer: YES
//for( i = 0; i < memory_size; i++ )
// if (*(char *)(pointerflush+i)!=0) printf("NON-ZERO encountered, so 'NO'.");

for( i = 0; i < 26; i++ )
{ for( k = 1; k <= 31; k++ )
  { bufend[i*31+k-1] = pointerflush + i * WHOLEletter_BufferSize + OffsetsInBuffer[k-1]; // i*31+k-1 must be 0..805
        if (i==25) { MAXusedBuffer[k] = (unsigned long)bufend[i*31+k-1]; }
    for( j = 0; j < (NumberOfSLOTs+1)*4; j++ ) // ? memset(bufend[i],0,(NumberOfSLOTs+1)*4);
    { *bufend[i*31+k-1]++ = 0;
      //++bufend[i*31+k-1];
    }   
        if (i==25) { MAXusedBuffer[k] = (unsigned long)bufend[i*31+k-1]-MAXusedBuffer[k]; }
    bufNumberOfWords[i*31+k-1]=0;
//for( j = 0; j < NumberOfSLOTs; j++ )
//bufNoWpS[i*31+k-1][j]=0;
  }
}

} else { //if (BSTorBtree != 2) {
	// _ ASCII code 095 
	// ` ASCII code 096  \
	// a ASCII code 097  / In total 26+1+1 radix instead of 27 to avoid +1 for each '_', code 096 not used.
	// z ASCII code 122
	// The hash  for 'a_quadruplet_for_example' will be calculated for first 5 chars:
	// = (byte1-'_')*28*28*28*28 + (byte2-'_')*28*28*28 + (byte3-'_')*28*28 + (byte4-'_')*28 + (byte5-'_')
	// Hash slots are 28*28*28*28*28 = 17,210,368 each containing one 64bit pointer i.e. 8bytes in length.
	// Hash size = 17,210,368*8 = 137,682,944 bytes
	// When at end all these slots(17,210,368- Btrees) are traversed the outcome is a sorted wordlist - no need of sorting.
	//unsigned long long SeekPosition;
	//unsigned long long *PointerToSeekPosition;
	// The 64bit external pool will be addressed via fsetpos(fp_outRG, PointerToSeekPosition); similarly to bufend approach from r.13 - that is bufend points to first(always following the last used btree leaf) free position in the pool.
	// For final stats all non-zero slots point to one btree.
//	printf( "Allocating HASH memory %s bytes ... ", _ui64toaKAZEcomma( (17210368*8) + 1 + 64 , llTOaDigits, 10) );
//	pointerflushUNALIGN = (char *)malloc( (17210368*8) + 1 + 64 );
	// Hash slots are 27bit = 2^27 = 134,217,728 each containing one 64bit pointer i.e. 8bytes in length.
	printf( "Allocating HASH memory %s bytes ... ", _ui64toaKAZEcomma( ((1<<HashInBITS)*8) + 1 + 64 , llTOaDigits, 10) );
	pointerflushUNALIGN = (char *)malloc( (1<<HashInBITS)*8 + 1 + 64 );
	if( pointerflushUNALIGN == NULL )
	{ puts( "\nLeprechaun: Needed memory allocation denied!\n" ); return( 1 ); }
	// r16
	pointerflush = pointerflushUNALIGN;
	//pointerflush = pointerflushUNALIGN + 64 - (((size_t)pointerflushUNALIGN) % 64); // 13_6+
	//offset=64-int((long)data&63);
	printf( "OK\n");
//	memset(pointerflush,0,17210368*8);
	memset(pointerflush,0,(1<<HashInBITS)*8);
				if (BSTorBtree == 2) {

	if( ( fp_outRG = fopen( "Leprechaun_64bit.hsh", "rb" ) ) == NULL )
	{ 
		HSHexist=0;
		if ( REUSE && ((HashInBITS-HashChunkSizeInBITS)==0) ) // Multiple-passes shouldn't be uploaded - it is meaningless, dump when only one pass.
			printf( "Leprechaun: Can't find file 'Leprechaun_64bit.hsh'.\n" );
	} else {
		HSHexist=1;
	fclose(fp_outRG);
	}

	if( ( fp_outRG = fopen( "Leprechaun_64bit.swp", "rb" ) ) == NULL )
	{ 
		SWPexist=0;
		if ( REUSE && ((HashInBITS-HashChunkSizeInBITS)==0) )
			printf( "Leprechaun: Can't find file 'Leprechaun_64bit.swp'.\n" );
	} else {
		SWPexist=1;
	fclose(fp_outRG);
	}

	if ( REUSE && ((HashInBITS-HashChunkSizeInBITS)==0) && (SWPexist+HSHexist == 2) ) {
		REUSE=2;
		if( ( fp_out = fopen( argv[2], "wb+" ) ) == NULL )
		{ printf( "Leprechaun: Can't create file %s \n", argv[2] ); return( 1 ); }
	}

	if( ( fp_outRG = fopen( "Leprechaun_64bit.hsh", "rb" ) ) != NULL ) {
		if ( REUSE == 2 ) { //  REUSE [

#if defined(_WIN32_ENVIRONMENT_)
   // 64bit:
_lseeki64( fileno(fp_outRG), 0L, SEEK_END );
size_inLINESIXFOUR = _telli64( fileno(fp_outRG) );
_lseeki64( fileno(fp_outRG), 0L, SEEK_SET );
#else
   // 64bit:
fseeko( fp_outRG, 0L, SEEK_END );
size_inLINESIXFOUR = ftello( fp_outRG );
fseeko( fp_outRG, 0L, SEEK_SET );
#endif /* defined(_WIN32_ENVIRONMENT_)  */
printf( "Uploading-n-Reusing 'Leprechaun_64bit.hsh' file: %s bytes\n", _ui64toaKAZEcomma(size_inLINESIXFOUR, llTOaDigits, 10) );

			fread( pointerflushUNALIGN, 1, (1<<HashInBITS)*8 + 1 + 64, fp_outRG ); // Notice that the actual size of .HSH file is not calculated since it won't work if not the same as during the creation.
		}
	fclose(fp_outRG);
	}

// Tag for the swap file is: LEPRECHAUNISH{ASCIIcode26}
// or 14bytes, then when type of the swap is requested:
// D:\_KAZE_~1\LEPREC~1>type Leprechaun_64bit.swp
// LEPRECHAUNISH
// D:\_KAZE_~1\LEPREC~1>
	size_in64_L14 = 1024 * (unsigned long long)Thunderwith + 14;
BufEnd_64 = 0+14;
// The tag plays two roles, the second to avoid existence of SeekPosition equal to 0. The 0 cannot be used as a free slot FLAG without the TAG.

/*
The opentype argument is a string that controls how the file is opened and specifies attributes of the resulting stream. It must begin with one of the following sequences of characters:

r
    Open an existing file for reading only.
w
    Open the file for writing only. If the file already exists, it is truncated to zero length. Otherwise a new file is created.
a
    Open a file for append access; that is, writing at the end of file only. If the file already exists, its initial contents are unchanged and output to the stream is appended to the end of the file. Otherwise, a new, empty file is created.
r+
    Open an existing file for both reading and writing. The initial contents of the file are unchanged and the initial file position is at the beginning of the file.
w+
    Open a file for both reading and writing. If the file already exists, it is truncated to zero length. Otherwise, a new file is created.
a+
    Open or create file for both reading and appending. If the file exists, its initial contents are unchanged. Otherwise, a new file is created. The initial file position for reading is at the beginning of the file, but output is always appended to the end of the file. 
*/

		// r16: Three+One conditions to reuse: Leprechaun_64bit.swp to exist, Not in multi-pass mode, W/w specified. The last one is the HASH upload to have been successful!
		if ( REUSE == 2 ) { //  REUSE [
	if( ( fp_outRG = fopen( "Leprechaun_64bit.swp", "rb+" ) ) == NULL )
	{ printf( "Leprechaun: Can't create file 'Leprechaun_64bit.swp'.\n" ); return( 1 ); }

#if defined(_WIN32_ENVIRONMENT_)
   // 64bit:
_lseeki64( fileno(fp_outRG), 0L, SEEK_END );
size_inLINESIXFOUR = _telli64( fileno(fp_outRG) );
_lseeki64( fileno(fp_outRG), 0L, SEEK_SET );
#else
   // 64bit:
fseeko( fp_outRG, 0L, SEEK_END );
size_inLINESIXFOUR = ftello( fp_outRG );
fseeko( fp_outRG, 0L, SEEK_SET );
#endif /* defined(_WIN32_ENVIRONMENT_)  */
printf( "Reusing 'Leprechaun_64bit.swp' file: %s bytes\n", _ui64toaKAZEcomma(size_inLINESIXFOUR, llTOaDigits, 10) );

	fsetpos(fp_outRG, &BufEnd_64); // SOMETHING ROTTEN with lseeki64/fseeko and fsetpos ??!! So DO-IT-OVER.
		}
		else { // REUSE
	if( ( fp_outRG = fopen( "Leprechaun_64bit.swp", "wb+" ) ) == NULL )
	{ printf( "Leprechaun: Can't create file 'Leprechaun_64bit.swp'.\n" ); return( 1 ); }
	printf( "Allocating/ZEROing %s bytes swap file ... ", _ui64toaKAZEcomma(size_in64_L14, llTOaDigits, 10) );
	fsetpos(fp_outRG, &fsetpos_ZERO); // SOMETHING ROTTEN with lseeki64/fseeko and fsetpos ??!! So DO-IT-OVER.
	memset(OneCkusterZEROES,0,1024*4);
	for (ThunderwithL64_L14=0; ThunderwithL64_L14 < size_in64_L14/(1024*4); ThunderwithL64_L14++)
        	fwrite(OneCkusterZEROES, 1024*4, 1, fp_outRG);
	for (ThunderwithL64_L14=0; ThunderwithL64_L14 < size_in64_L14%(1024*4); ThunderwithL64_L14++)
        	fwrite(&OneChar_ieByte, 1, 1, fp_outRG);
	fsetpos(fp_outRG, &fsetpos_ZERO); // SOMETHING ROTTEN with lseeki64/fseeko and fsetpos ??!! So DO-IT-OVER.
        	fwrite(FileSwapTag, 13, 1, fp_outRG);
        	fwrite(&EOFcode, 1, 1, fp_outRG);
	fsetpos(fp_outRG, &BufEnd_64); // SOMETHING ROTTEN with lseeki64/fseeko and fsetpos ??!! So DO-IT-OVER.
	printf( "OK\n");
		} // REUSE ]
				} else { // ########## 64bit memory manipulations [
	size_in64_L14 = 1024 * (unsigned long long)Thunderwith + 14 + 1 + 64;
printf( "Allocating memory %luMB ... ", (size_in64_L14>>20)+1 );
pointerflushUNALIGN_64 = (char *)malloc( size_in64_L14 );
if( pointerflushUNALIGN_64 == NULL )
{ puts( "\nLeprechaun: Needed memory allocation denied!\n" ); return( 1 ); }
pointerflush_64 = pointerflushUNALIGN_64 + 64 - (((size_t)pointerflushUNALIGN_64) % 64); // 13_6+
//offset=64-int((long)data&63);
	//memset(pointerflush_64,0,1024 * (unsigned long long)Thunderwith + 14);
BufEnd_64 = (unsigned long long)pointerflush_64;
/*
printf( "BufEnd_64: %s\n", _ui64toaKAZEcomma(BufEnd_64, llTOaDigits, 10) );
printf( "pointerflush_64: %s\n", _ui64toaKAZEcomma(pointerflush_64, llTOaDigits, 10) );
pointerflush_64 = (char *)BufEnd_64;
printf( "pointerflush_64: %s\n", _ui64toaKAZEcomma(pointerflush_64, llTOaDigits, 10) );
exit (1);
	//BufEnd_64: 541,261,888
	//pointerflush_64: 541,261,888
	//pointerflush_64: 541,261,888
*/
	printf( "OK\n");
				} // ########## 64bit memory manipulations ]
	fprintf( fp_outLOG, "Leprechaun report:\n" );
} //if (BSTorBtree != 2) {


// PROGRAM PROGRAM PROGRAM PROGRAM PROGRAM PROGRAM PROGRAM PROGRAM PROGRAM
(void) time(&t1);
        Melnitchka = 0;
        WORDcount = 0; // Total word count i.e. for all files!
        WORDcountDistinct = 0;
        NumberOfFiles = 0;
        NumberOfLines = 0;
        FilesLEN = 0;
        LINE10len = 0;
// Added in r.14+++++FIXFIX [
	NumberOfTrees=0; NumberOfHashCollisions=0;
	NumberOfLEAFs=0;
	WORDcountAttemptsToPut=0;
	LevelsInCorona_Not_Counting_ROOT=0;
// Added in r.14+++++FIXFIX ]

        for( k = 0; k < size_in; k++ )
	{
                fread( &workbyte, 1, 1, fp_in );
                if( workbyte != 10 )
                { if( workbyte != 13 ) // NON UNIX
                  { if( LINE10len < 255 ) { LINE10[ LINE10len ] = workbyte; }
                    LINE10len++;
                  }
                  else
                  {
                  }
		}
		else
                { if( 1 <= LINE10len && LINE10len <= 255 )
                  { LINE10[ LINE10len ] = 0;
METACOMMANDFlag = 0;
if ( strcmp(LINE10, "Leprechaun says x-gram inserting disabled for next files: ON\0") == 0 ) {DoNotInsertFlag = 1; METACOMMANDFlag = 1;}
if ( strcmp(LINE10, "Leprechaun says x-gram inserting disabled for next files: OFF\0") == 0 ) {DoNotInsertFlag = 0; METACOMMANDFlag = 1;}

if( METACOMMANDFlag == 0 )
{ // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IT IS a FILENAME not a METACOMMAND [
if( ( fp_inLINE = fopen( LINE10, "rb" ) ) == NULL ) 	// Since r15FIXFIX+ a command [METACOMMAND] inside the .LST file is allowed: 'Leprechaun says x-gram inserting disabled for next files: ON'
							// To allow again (which is default) use: 'Leprechaun says x-gram inserting disabled for next files: OFF'
{ printf( "Leprechaun: Can't open file %s \n", LINE10 ); return( 1 ); }

//fseek( fp_inLINE, 0L, SEEK_END );  //Rev. 12
//size_inLINE = ftell( fp_inLINE );  //Rev. 12
//fseek( fp_inLINE, 0L, SEEK_SET );  //Rev. 12

#if defined(_WIN32_ENVIRONMENT_)
   // 64bit:
_lseeki64( fileno(fp_inLINE), 0L, SEEK_END );
size_inLINESIXFOUR = _telli64( fileno(fp_inLINE) );
_lseeki64( fileno(fp_inLINE), 0L, SEEK_SET );
#else
   // 64bit:
fseeko( fp_inLINE, 0L, SEEK_END );
size_inLINESIXFOUR = ftello( fp_inLINE );
fseeko( fp_inLINE, 0L, SEEK_SET );
#endif /* defined(_WIN32_ENVIRONMENT_)  */

printf( "Size of Input TEXTual file: %s\n", _ui64toaKAZEcomma(size_inLINESIXFOUR, llTOaDigits, 10) );
FilesLEN = FilesLEN + size_inLINESIXFOUR;
NumberOfFiles++;

        //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	wrdlen = 0;
        for( i = 0; i < size_inLINESIXFOUR; i++ )
	{

                // ~~~~~~~~~~~~ Buffering fread [
                if (workKoffset == -1) {
                        if (i + 1024*128 < size_inLINESIXFOUR) {
                                fread( &workK[0], 1, 1024*128, fp_inLINE );
                                workKoffset = 0;
                                workbyte = workK[workKoffset];
                        } else 
                        fread( &workbyte, 1, 1, fp_inLINE );
                } else {
                        workKoffset++;
                        workbyte = workK[workKoffset];
                        if (workKoffset == 1024*128 - 1) workKoffset = -1;
                }
                // ~~~~~~~~~~~~ Buffering fread ]

//                if( isalpha( workbyte ) )
//                {
//                        if( wrdlen < 31 )
//                        { wrd[ wrdlen ] = tolower( workbyte ); }
//                        wrdlen++;
//                }

                if ( workbyte < 'A' ) // Most characters are under alphabet - only one if
                {
ElStupido:
                        // This fragment is MIRRORed: #1 copy [
                        if (workbyte == 10) {NumberOfLines++;}

// Quadruple! [
// Sliding window for 'wrd': The incoming string 'a lot of things must' becomes 'a_lot_of_things' and 'lot_of_things_must':

// ain_t_that_a
// didn_t_feel_a
// i_didn_t_feel
// t_feel_a_thing
// t_that_a_cake

// 316
// 00:17:55,859 --> 00:17:58,447
// Ain't that a cake ? I didn't feel a thing !

                           if ( PLE_words_INITflag == 0 && ( (PLE_words != 0) || (PLE_words == 0 && wrdlen != 0) ) )
                           if ( workbyte == '.' || workbyte == '!' || workbyte == '?' || workbyte == ':' || workbyte == ';' || workbyte == ',' || workbyte == '\t' ) {
                              PLE_words_INITflag = 1;
                           } 
// Quadruple! ]

//r.15fixfix [
                        if( wrdlen > 31 ) PLE_words_INITflag = 1;
//r.15fixfix ]

                        //if ( 1 <= wrdlen && wrdlen <= LongestLineInclusive ) // Enforce no word with length greater than 31 with below line to enter x-lets.
                        if ( 1 <= wrdlen && wrdlen <= 31 )
			{
                                wrd[ wrdlen ] = 0;
                                // OTKACHAM: 1<<17-1 gives 65536 i.e. '-' have had high priority than '<<'
                                //Next line gives error due to mix of '&' and 'double' 
                                if ((WORDcount & ((1<<18)-1)) == 0)
                                { //_ui64toaKAZEzerocomma(WORDcount, llTOaDigits, 10);
                                  //printf( "Word count: %s(%lu/128 done)\r", llTOaDigits, ((long long)i*100) / size_inLINESIXFOUR );
//++Melnitchka;
//Melnitchka = Melnitchka % 4;
//if (Melnitchka == 0){ printf( "|; Word count: %s of them %s distinct; Done: %lu/64\r", _ui64toaKAZEcomma(WORDcount, llTOaDigits, 10), _ui64toaKAZEcomma((unsigned long long)WORDcountDistinct, llTOaDigits2, 10), ((long long)i<<6) / size_inLINESIXFOUR ); }
//if (Melnitchka == 1){ printf( "/; Word count: %s of them %s distinct; Done: %lu/64\r", _ui64toaKAZEcomma(WORDcount, llTOaDigits, 10), _ui64toaKAZEcomma((unsigned long long)WORDcountDistinct, llTOaDigits2, 10), ((long long)i<<6) / size_inLINESIXFOUR ); }
//if (Melnitchka == 2){ printf( "-; Word count: %s of them %s distinct; Done: %lu/64\r", _ui64toaKAZEcomma(WORDcount, llTOaDigits, 10), _ui64toaKAZEcomma((unsigned long long)WORDcountDistinct, llTOaDigits2, 10), ((long long)i<<6) / size_inLINESIXFOUR ); }
//if (Melnitchka == 3){ printf( "\\; Word count: %s of them %s distinct; Done: %lu/64\r", _ui64toaKAZEcomma(WORDcount, llTOaDigits, 10), _ui64toaKAZEcomma((unsigned long long)WORDcountDistinct, llTOaDigits2, 10), ((long long)i<<6) / size_inLINESIXFOUR ); }
Melnitchka = Melnitchka & 3; // 0 1 2 3: 00 01 10 11
				(void) time(&t4);
				if (t4 <= t1) {t4 = t1; t4++;}
printf( "%s; %sP/s; Phrase count: %s of them %s distinct; Done: %lu/64\r", Auberge[Melnitchka++], _ui64toaKAZEzerocomma(WORDcount/((int) t4-t1), llTOaDigits3, 10)+(26-10), _ui64toaKAZEcomma(WORDcount, llTOaDigits, 10), _ui64toaKAZEcomma((unsigned long long)WORDcountDistinct, llTOaDigits2, 10), ((long long)i<<6) / size_inLINESIXFOUR );
                                }

//14+++ [
PLE_words++;

#ifdef singleton
                              PLE_words_INITflag = 1;
#endif
#ifdef doubleton
if (PLE_words == 1)
       strcpy( wrd1st, wrd );
else if (PLE_words == 2) {
       strcpy( wrd2nd, wrd );
       wrdlen = strlen(wrd1st)+strlen(wrd2nd)+1; // '_'
       //wrdlen = strlen(wrd);
       //if ( wrdlen <= 31 ) {
       if ( wrdlen <= LongestLineInclusive ) {
	  strcpy(wrd, wrd1st);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd2nd);
       }
}
else {
       PLE_words = 2;
       strcpy( wrd1st, wrd2nd );
       strcpy( wrd2nd, wrd );
       wrdlen = strlen(wrd1st)+strlen(wrd2nd)+1; // '_'
       //wrdlen = strlen(wrd);
       //if ( wrdlen <= 31 ) {
       if ( wrdlen <= LongestLineInclusive ) {
	  strcpy(wrd, wrd1st);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd2nd);
       }
}
#endif
#ifdef tripleton
if (PLE_words == 1)
       strcpy( wrd1st, wrd );
else if (PLE_words == 2)
       strcpy( wrd2nd, wrd );
else if (PLE_words == 3) {
       strcpy( wrd3rd, wrd );
       wrdlen = strlen(wrd1st)+strlen(wrd2nd)+strlen(wrd3rd)+1+1; // '_''_'
       //wrdlen = strlen(wrd);
       //if ( wrdlen <= 31 ) {
       if ( wrdlen <= LongestLineInclusive ) {
	  strcpy(wrd, wrd1st);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd2nd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd3rd);
       }
}
else {
       PLE_words = 3;
       strcpy( wrd1st, wrd2nd );
       strcpy( wrd2nd, wrd3rd );
       strcpy( wrd3rd, wrd );
       wrdlen = strlen(wrd1st)+strlen(wrd2nd)+strlen(wrd3rd)+1+1; // '_''_'
       //wrdlen = strlen(wrd);
       //if ( wrdlen <= 31 ) {
       if ( wrdlen <= LongestLineInclusive ) {
	  strcpy(wrd, wrd1st);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd2nd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd3rd);
       }
}
#endif
#ifdef quadrupleton
// Quadruple! [
if (PLE_words == 1)
       strcpy( wrd1st, wrd );
else if (PLE_words == 2)
       strcpy( wrd2nd, wrd );
else if (PLE_words == 3)
       strcpy( wrd3rd, wrd );
else if (PLE_words == 4) {
       strcpy( wrd4th, wrd );
       wrdlen = strlen(wrd1st)+strlen(wrd2nd)+strlen(wrd3rd)+strlen(wrd4th)+1+1+1; // '_''_''_'
       //wrdlen = strlen(wrd);
       //if ( wrdlen <= 31 ) {
       if ( wrdlen <= LongestLineInclusive ) {
	  strcpy(wrd, wrd1st);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd2nd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd3rd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd4th);
       }
}
else {
       PLE_words = 4;
       strcpy( wrd1st, wrd2nd );
       strcpy( wrd2nd, wrd3rd );
       strcpy( wrd3rd, wrd4th );
       strcpy( wrd4th, wrd );
       wrdlen = strlen(wrd1st)+strlen(wrd2nd)+strlen(wrd3rd)+strlen(wrd4th)+1+1+1; // '_''_''_'
       //wrdlen = strlen(wrd);
       //if ( wrdlen <= 31 ) {
       if ( wrdlen <= LongestLineInclusive ) {
	  strcpy(wrd, wrd1st);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd2nd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd3rd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd4th);
       }
}
// Quadruple! ]
#endif
#ifdef quintupleton
if (PLE_words == 1)
       strcpy( wrd1st, wrd );
else if (PLE_words == 2)
       strcpy( wrd2nd, wrd );
else if (PLE_words == 3)
       strcpy( wrd3rd, wrd );
else if (PLE_words == 4)
       strcpy( wrd4th, wrd );
else if (PLE_words == 5) {
       strcpy( wrd5th, wrd );
       wrdlen = strlen(wrd1st)+strlen(wrd2nd)+strlen(wrd3rd)+strlen(wrd4th)+strlen(wrd5th)+1+1+1+1; // '_''_''_''_'
       //wrdlen = strlen(wrd);
       //if ( wrdlen <= 31 ) {
       if ( wrdlen <= LongestLineInclusive ) {
	  strcpy(wrd, wrd1st);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd2nd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd3rd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd4th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd5th);
       }
}
else {
       PLE_words = 5;
       strcpy( wrd1st, wrd2nd );
       strcpy( wrd2nd, wrd3rd );
       strcpy( wrd3rd, wrd4th );
       strcpy( wrd4th, wrd5th );
       strcpy( wrd5th, wrd );
       wrdlen = strlen(wrd1st)+strlen(wrd2nd)+strlen(wrd3rd)+strlen(wrd4th)+strlen(wrd5th)+1+1+1+1; // '_''_''_''_'
       //wrdlen = strlen(wrd);
       //if ( wrdlen <= 31 ) {
       if ( wrdlen <= LongestLineInclusive ) {
	  strcpy(wrd, wrd1st);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd2nd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd3rd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd4th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd5th);
       }
}
#endif
#ifdef sextupleton
if (PLE_words == 1)
       strcpy( wrd1st, wrd );
else if (PLE_words == 2)
       strcpy( wrd2nd, wrd );
else if (PLE_words == 3)
       strcpy( wrd3rd, wrd );
else if (PLE_words == 4)
       strcpy( wrd4th, wrd );
else if (PLE_words == 5)
       strcpy( wrd5th, wrd );
else if (PLE_words == 6) {
       strcpy( wrd6th, wrd );
       wrdlen = strlen(wrd1st)+strlen(wrd2nd)+strlen(wrd3rd)+strlen(wrd4th)+strlen(wrd5th)+strlen(wrd6th)+1+1+1+1+1; // '_''_''_''_''_'
       //wrdlen = strlen(wrd);
       //if ( wrdlen <= 31 ) {
       if ( wrdlen <= LongestLineInclusive ) {
	  strcpy(wrd, wrd1st);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd2nd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd3rd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd4th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd5th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd6th);
       }
}
else {
       PLE_words = 6;
       strcpy( wrd1st, wrd2nd );
       strcpy( wrd2nd, wrd3rd );
       strcpy( wrd3rd, wrd4th );
       strcpy( wrd4th, wrd5th );
       strcpy( wrd5th, wrd6th );
       strcpy( wrd6th, wrd );
       wrdlen = strlen(wrd1st)+strlen(wrd2nd)+strlen(wrd3rd)+strlen(wrd4th)+strlen(wrd5th)+strlen(wrd6th)+1+1+1+1+1; // '_''_''_''_''_'
       //wrdlen = strlen(wrd);
       //if ( wrdlen <= 31 ) {
       if ( wrdlen <= LongestLineInclusive ) {
	  strcpy(wrd, wrd1st);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd2nd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd3rd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd4th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd5th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd6th);
       }
}
#endif
#ifdef septupleton
if (PLE_words == 1)
       strcpy( wrd1st, wrd );
else if (PLE_words == 2)
       strcpy( wrd2nd, wrd );
else if (PLE_words == 3)
       strcpy( wrd3rd, wrd );
else if (PLE_words == 4)
       strcpy( wrd4th, wrd );
else if (PLE_words == 5)
       strcpy( wrd5th, wrd );
else if (PLE_words == 6)
       strcpy( wrd6th, wrd );
else if (PLE_words == 7) {
       strcpy( wrd7th, wrd );
       wrdlen = strlen(wrd1st)+strlen(wrd2nd)+strlen(wrd3rd)+strlen(wrd4th)+strlen(wrd5th)+strlen(wrd6th)+strlen(wrd7th)+1+1+1+1+1+1; // '_''_''_''_''_''_'
       //wrdlen = strlen(wrd);
       //if ( wrdlen <= 31 ) {
       if ( wrdlen <= LongestLineInclusive ) {
	  strcpy(wrd, wrd1st);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd2nd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd3rd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd4th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd5th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd6th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd7th);
       }
}
else {
       PLE_words = 7;
       strcpy( wrd1st, wrd2nd );
       strcpy( wrd2nd, wrd3rd );
       strcpy( wrd3rd, wrd4th );
       strcpy( wrd4th, wrd5th );
       strcpy( wrd5th, wrd6th );
       strcpy( wrd6th, wrd7th );
       strcpy( wrd7th, wrd );
       wrdlen = strlen(wrd1st)+strlen(wrd2nd)+strlen(wrd3rd)+strlen(wrd4th)+strlen(wrd5th)+strlen(wrd6th)+strlen(wrd7th)+1+1+1+1+1+1; // '_''_''_''_''_''_'
       //wrdlen = strlen(wrd);
       //if ( wrdlen <= 31 ) {
       if ( wrdlen <= LongestLineInclusive ) {
	  strcpy(wrd, wrd1st);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd2nd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd3rd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd4th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd5th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd6th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd7th);
       }
}
#endif
#ifdef octupleton
if (PLE_words == 1)
       strcpy( wrd1st, wrd );
else if (PLE_words == 2)
       strcpy( wrd2nd, wrd );
else if (PLE_words == 3)
       strcpy( wrd3rd, wrd );
else if (PLE_words == 4)
       strcpy( wrd4th, wrd );
else if (PLE_words == 5)
       strcpy( wrd5th, wrd );
else if (PLE_words == 6)
       strcpy( wrd6th, wrd );
else if (PLE_words == 7)
       strcpy( wrd7th, wrd );
else if (PLE_words == 8) {
       strcpy( wrd8th, wrd );
       wrdlen = strlen(wrd1st)+strlen(wrd2nd)+strlen(wrd3rd)+strlen(wrd4th)+strlen(wrd5th)+strlen(wrd6th)+strlen(wrd7th)+strlen(wrd8th)+1+1+1+1+1+1+1; // '_''_''_''_''_''_''_'
       //wrdlen = strlen(wrd);
       //if ( wrdlen <= 31 ) {
       if ( wrdlen <= LongestLineInclusive ) {
	  strcpy(wrd, wrd1st);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd2nd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd3rd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd4th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd5th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd6th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd7th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd8th);
       }
}
else {
       PLE_words = 8;
       strcpy( wrd1st, wrd2nd );
       strcpy( wrd2nd, wrd3rd );
       strcpy( wrd3rd, wrd4th );
       strcpy( wrd4th, wrd5th );
       strcpy( wrd5th, wrd6th );
       strcpy( wrd6th, wrd7th );
       strcpy( wrd7th, wrd8th );
       strcpy( wrd8th, wrd );
       wrdlen = strlen(wrd1st)+strlen(wrd2nd)+strlen(wrd3rd)+strlen(wrd4th)+strlen(wrd5th)+strlen(wrd6th)+strlen(wrd7th)+strlen(wrd8th)+1+1+1+1+1+1+1; // '_''_''_''_''_''_''_'
       //wrdlen = strlen(wrd);
       //if ( wrdlen <= 31 ) {
       if ( wrdlen <= LongestLineInclusive ) {
	  strcpy(wrd, wrd1st);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd2nd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd3rd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd4th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd5th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd6th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd7th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd8th);
       }
}
#endif
#ifdef nonupleton
if (PLE_words == 1)
       strcpy( wrd1st, wrd );
else if (PLE_words == 2)
       strcpy( wrd2nd, wrd );
else if (PLE_words == 3)
       strcpy( wrd3rd, wrd );
else if (PLE_words == 4)
       strcpy( wrd4th, wrd );
else if (PLE_words == 5)
       strcpy( wrd5th, wrd );
else if (PLE_words == 6)
       strcpy( wrd6th, wrd );
else if (PLE_words == 7)
       strcpy( wrd7th, wrd );
else if (PLE_words == 8)
       strcpy( wrd8th, wrd );
else if (PLE_words == 9) {
       strcpy( wrd9th, wrd );
       wrdlen = strlen(wrd1st)+strlen(wrd2nd)+strlen(wrd3rd)+strlen(wrd4th)+strlen(wrd5th)+strlen(wrd6th)+strlen(wrd7th)+strlen(wrd8th)+strlen(wrd9th)+1+1+1+1+1+1+1+1; // '_''_''_''_''_''_''_''_'
       //wrdlen = strlen(wrd);
       //if ( wrdlen <= 31 ) {
       if ( wrdlen <= LongestLineInclusive ) {
	  strcpy(wrd, wrd1st);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd2nd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd3rd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd4th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd5th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd6th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd7th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd8th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd9th);
       }
}
else {
       PLE_words = 9;
       strcpy( wrd1st, wrd2nd );
       strcpy( wrd2nd, wrd3rd );
       strcpy( wrd3rd, wrd4th );
       strcpy( wrd4th, wrd5th );
       strcpy( wrd5th, wrd6th );
       strcpy( wrd6th, wrd7th );
       strcpy( wrd7th, wrd8th );
       strcpy( wrd8th, wrd9th );
       strcpy( wrd9th, wrd );
       wrdlen = strlen(wrd1st)+strlen(wrd2nd)+strlen(wrd3rd)+strlen(wrd4th)+strlen(wrd5th)+strlen(wrd6th)+strlen(wrd7th)+strlen(wrd8th)+strlen(wrd9th)+1+1+1+1+1+1+1+1; // '_''_''_''_''_''_''_''_'
       //wrdlen = strlen(wrd);
       //if ( wrdlen <= 31 ) {
       if ( wrdlen <= LongestLineInclusive ) {
	  strcpy(wrd, wrd1st);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd2nd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd3rd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd4th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd5th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd6th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd7th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd8th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd9th);
       }
}
#endif
#ifdef decupleton
if (PLE_words == 1)
       strcpy( wrd1st, wrd );
else if (PLE_words == 2)
       strcpy( wrd2nd, wrd );
else if (PLE_words == 3)
       strcpy( wrd3rd, wrd );
else if (PLE_words == 4)
       strcpy( wrd4th, wrd );
else if (PLE_words == 5)
       strcpy( wrd5th, wrd );
else if (PLE_words == 6)
       strcpy( wrd6th, wrd );
else if (PLE_words == 7)
       strcpy( wrd7th, wrd );
else if (PLE_words == 8)
       strcpy( wrd8th, wrd );
else if (PLE_words == 9)
       strcpy( wrd9th, wrd );
else if (PLE_words == 10) {
       strcpy( wrd10th, wrd );
       wrdlen = strlen(wrd1st)+strlen(wrd2nd)+strlen(wrd3rd)+strlen(wrd4th)+strlen(wrd5th)+strlen(wrd6th)+strlen(wrd7th)+strlen(wrd8th)+strlen(wrd9th)+strlen(wrd10th)+1+1+1+1+1+1+1+1+1; // '_''_''_''_''_''_''_''_''_'
       //wrdlen = strlen(wrd);
       //if ( wrdlen <= 31 ) {
       if ( wrdlen <= LongestLineInclusive ) {
	  strcpy(wrd, wrd1st);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd2nd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd3rd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd4th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd5th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd6th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd7th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd8th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd9th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd10th);
       }
}
else {
       PLE_words = 10;
       strcpy( wrd1st, wrd2nd );
       strcpy( wrd2nd, wrd3rd );
       strcpy( wrd3rd, wrd4th );
       strcpy( wrd4th, wrd5th );
       strcpy( wrd5th, wrd6th );
       strcpy( wrd6th, wrd7th );
       strcpy( wrd7th, wrd8th );
       strcpy( wrd8th, wrd9th );
       strcpy( wrd9th, wrd10th );
       strcpy( wrd10th, wrd );
       wrdlen = strlen(wrd1st)+strlen(wrd2nd)+strlen(wrd3rd)+strlen(wrd4th)+strlen(wrd5th)+strlen(wrd6th)+strlen(wrd7th)+strlen(wrd8th)+strlen(wrd9th)+strlen(wrd10th)+1+1+1+1+1+1+1+1+1; // '_''_''_''_''_''_''_''_''_'
       //wrdlen = strlen(wrd);
       //if ( wrdlen <= 31 ) {
       if ( wrdlen <= LongestLineInclusive ) {
	  strcpy(wrd, wrd1st);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd2nd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd3rd);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd4th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd5th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd6th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd7th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd8th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd9th);
	  strcat(wrd, DelimiterUnderscore);
	  strcat(wrd, wrd10th);
       }
}
#endif
//14+++ ]

//14+++ [
#ifdef singleton
                        if ( ( PLE_words == 1 ) && ( 1 <= wrdlen ) && ( wrdlen <= 31 ) ) {
#endif
#ifdef doubleton
                        if ( ( PLE_words == 2 ) && ( 5 <= wrdlen ) && ( wrdlen <= 41 ) ) {
#endif
#ifdef tripleton
                        if ( ( PLE_words == 3 ) && ( 9 <= wrdlen ) && ( wrdlen <= 41 ) ) {
#endif
#ifdef quadrupleton
                        if ( ( PLE_words == 4 ) && ( 13 <= wrdlen ) && ( wrdlen <= 51 ) ) {
#endif
#ifdef quintupleton
                        if ( ( PLE_words == 5 ) && ( 17 <= wrdlen ) && ( wrdlen <= 61 ) ) {
#endif
#ifdef sextupleton
                        if ( ( PLE_words == 6 ) && ( 21 <= wrdlen ) && ( wrdlen <= 71 ) ) {
#endif
#ifdef septupleton
                        if ( ( PLE_words == 7 ) && ( 25 <= wrdlen ) && ( wrdlen <= 81 ) ) {
#endif
#ifdef octupleton
                        if ( ( PLE_words == 8 ) && ( 29 <= wrdlen ) && ( wrdlen <= 91 ) ) {
#endif
#ifdef nonupleton
                        if ( ( PLE_words == 9 ) && ( 33 <= wrdlen ) && ( wrdlen <= 101 ) ) {
#endif
#ifdef decupleton
                        if ( ( PLE_words == 10 ) && ( 37 <= wrdlen ) && ( wrdlen <= 111 ) ) {
#endif
//14+++ ]
WORDcount++;
if (BSTorBtree < 2) {

  LetterOffset = (int)( wrd[0] - 'a' ) * 31 + (wrdlen-1); // 0..805
  //BufStart = pointerflush + LetterOffset * LetterBuffer; // OLD

  BufStart = pointerflush + (int)( wrd[0] - 'a' ) * WHOLEletter_BufferSize + OffsetsInBuffer[wrdlen-1];
  // Above line and Below line are equal
  //BufStart = pointerflush + (LetterOffset / 31) * WHOLEletter_BufferSize + OffsetsInBuffer[LetterOffset % 31];

              //Slot = KuxHash3plus(wrd)<<2; //13++
              //Slot = FNV1A_Hash_SHIFTless_XORless(wrd)<<2; //13+++
              //Slot = FNV1A_Hash_4_OCTETS(wrd, wrdlen>>2)<<2; //13++++
              //Slot = FNV1A_Hash_4_OCTETS_31(wrd, wrdlen>>2)<<2; //13+++++
/*
if (wrdlen<=19) // 4x4+3=19
              Slot = FNV1A_Hash_4_OCTETS(wrd, wrdlen>>2)<<2; //13++++
else            // 2x8+4=20 i.e. first contains 5 clashes
              Slot = FNV1A_Hash_8_OCTETS(wrd, wrdlen>>3)<<2; //13+++++
*/
//if (wrdlen<=19) // 4x4+3=19 i.e. last contains 7 clashes
//              Slot = FNV1A_Hash_Granularity(wrd, wrdlen>>2, 2)<<2; //13+++++
//else            // 2x8+4=20 i.e. first contains 6 clashes
//              Slot = FNV1A_Hash_Granularity(wrd, wrdlen>>3, 3)<<2; //13+++++

              //Slot = FNV1A_Hash_8_OCTETS(wrd, wrdlen>>3)<<2; //13_7p
              //Slot = HashFNV1A_unrolled_Final(wrd, wrdlen)<<2; //13_7p
	      //Slot = HashAlfalfa_HALF(wrd, wrdlen)<<2; //13_7p
	      //Slot = Hash_Alfalfa(wrd, wrdlen)<<2; //13_7p
//	      Slot = Sixtinsensitive(wrd, wrdlen)<<2; //13_7p
	      Slot = FNV1A_Hash_Jesteress(wrd, wrdlen)<<2; //13_7p
//	      Slot = FNV1A_Hash_Jester(wrd, wrdlen)<<2; //13_7p

/*
	hashAlfalfa = 7;
	for(iAlfalfa = 0; iAlfalfa < (wrdlen & -2); iAlfalfa += 2) {
		hashAlfalfa = (17+9) * ((17+9) * hashAlfalfa + (wrd[iAlfalfa])) + (wrd[iAlfalfa+1]);
	}
	if(wrdlen & 1)
		hashAlfalfa = (17+9) * hashAlfalfa + (wrd[wrdlen-1]);

              Slot = (( hashAlfalfa ^ (hashAlfalfa >> 16) ) & 8191)<<2; //13_7p
*/

                    memcpy( &PseudoLinkedPointer, BufStart+Slot, 4 );
//; Line 917
//  mov     edx, DWORD PTR [eax+ebp]
//  add     esp, 4
                    // ?! DANGEROUS: above and below lines are(must:long must be 4bytes) identical
                    //PseudoLinkedPointer = (unsigned long)*(long *)(BufStart+Slot);
//; Line 919
//  mov     edx, DWORD PTR [eax+ebp]
//  add     esp, 4
                    //        while (count--) {
                    //        *(char *)dst = *(char *)src;
                    //        dst = (char *)dst + 1;
                    //        src = (char *)src + 1;
                    //        }

if (BSTorBtree == 0)
{
// @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ BST fragment [
                    if (PseudoLinkedPointer == 0) // means EMPTY-SLOT
                    {
            //if( (unsigned long)(bufend[LetterOffset] - BufStart) + wrdlen + 4 < (GRMBLhill[(int)wrdlen] * LetterBuffer)/31 ) // OLD slower
            if( (unsigned long)(bufend[LetterOffset] - BufStart) + wrdlen + 4 + 4 < GRMBLFoolAgain[(int)wrdlen] ) // +4 more for BST instead of LL
                      {
                      memcpy( BufStart+Slot, &bufend[LetterOffset], 4 );
//; Line 932
//  mov     DWORD PTR [eax+ebp], esi
                      // ?! DANGEROUS: above and below lines are(must:long must be 4bytes) identical
                      //*(long *)(BufStart+Slot) = *(long *)&bufend[LetterOffset];
//; Line 936
//  mov     DWORD PTR [eax+ebp], esi

                      // Below 3 lines are commented due to experiment below malloc which shows that allocated memory is ZEROed.
                      //memcpy( bufend[LetterOffset], &BufStart[NumberOfSLOTs*4], 4 ); // means next exists not: Means PseudoLinkedPointerL = 0
//; Line 940
//  mov     ecx, DWORD PTR [ebp+32768]
                      // ?! DANGEROUS: above and below lines are(must:long must be 4bytes) identical
                      //*(long *)bufend[LetterOffset] = *(long *)&BufStart[NumberOfSLOTs*4];
//; Line 944
//  mov     ecx, DWORD PTR [ebp+32768]
                      //bufend[LetterOffset] = bufend[LetterOffset] + 4;
                      //memcpy( bufend[LetterOffset], &BufStart[NumberOfSLOTs*4], 4 ); // means next exists not: Means PseudoLinkedPointerR = 0
                      bufend[LetterOffset] = bufend[LetterOffset] + 4 + 4; // + 4 due to above commenting
                      memcpy( bufend[LetterOffset], wrd, wrdlen ); WORDcountDistinct++; bufNumberOfWords[LetterOffset]++;
                                //bufNoWpS[LetterOffset][Slot]++; // ?! crashes
                      bufend[LetterOffset] = bufend[LetterOffset] + wrdlen;
                      if (MAXusedBuffer[wrdlen] < (unsigned long)(bufend[LetterOffset] - BufStart)) {MAXusedBuffer[wrdlen] = (unsigned long)(bufend[LetterOffset] - BufStart);}
                      }
            else
            { printf( "\nLeprechaun: Failure! Increment 'Memory for each Letter' parameter(third one)!\n" );
fprintf( fp_outLOG, "Input File with a list of TEXTual Files: %s\n", argv[1] );
fprintf( fp_outLOG, "Size of all TEXTual Files: %s\n", _ui64toaKAZEcomma(FilesLEN, llTOaDigits, 10) );
fprintf( fp_outLOG, "Word count: %s of them %s distinct\n", _ui64toaKAZEcomma(WORDcount, llTOaDigits, 10), _ui64toaKAZEcomma((unsigned long long)WORDcountDistinct, llTOaDigits2, 10) );
fprintf( fp_outLOG, "Number Of Files: %lu\n", NumberOfFiles );
fprintf( fp_outLOG, "Number Of Lines: %lu\n", NumberOfLines );
fprintf( fp_outLOG, "Allocated memory in MB: %lu\n", (unsigned long)(memory_size>>20)+1 );
fprintf( fp_outLOG, "Total Attempts to Find/Put WORDs into Binary-Search-Trees: %s\n", _ui64toaKAZEcomma(WORDcountAttemptsToPut, llTOaDigits, 10) );
for( k = 1; k < 32; k++ )
{ fprintf( fp_outLOG, "Words with length %s occupy %sKB of %sKB given i.e. %s%s utilization\n", _ui64toaKAZEzerocomma(k, llTOaDigits, 10)+(26-2), _ui64toaKAZEzerocomma((MAXusedBuffer[k]>>10)+1, llTOaDigits2, 10)+(26-5), _ui64toaKAZEzerocomma((((GRMBLhill[(int)k] * LetterBuffer)/31)>>10)+1, llTOaDigits3, 10)+(26-5), _ui64toaKAZEzerocomma((unsigned long long)(MAXusedBuffer[k]*100)/((GRMBLhill[(int)k] * LetterBuffer)/31), llTOaDigits4, 10)+(26-2), "%\0" ); // 26 are all 26-DESIRED=24
}
fprintf( fp_outLOG, "Used value for third parameter in KB: %lu\n", (unsigned long)Thunderwith );
              fprintf( fp_outLOG, "Leprechaun: Failure! Increment 'Memory for each Letter' parameter(third one)!\n\n");
              return( 1 );
            }
                    }
                    else // means USED-SLOT
                    { FoundInLinkedList = 0;
                      while (PseudoLinkedPointer != 0 && FoundInLinkedList == 0)
                      { 
                        if (memcmp(PseudoLinkedPointer+4+4,wrd,wrdlen) == 0)
                    //        while ( --count && *(char *)buf1 == *(char *)buf2 ) {
                    //        buf1 = (char *)buf1 + 1;
                    //        buf2 = (char *)buf2 + 1;
                    //        }
                    //        return( *((unsigned char *)buf1) - *((unsigned char *)buf2) );
                        { FoundInLinkedList = 1;
                        }
                        else // i.e < or >
                        {
                          if (memcmp(PseudoLinkedPointer+4+4,wrd,wrdlen) > 0)
                          memcpy( &PseudoLinkedPointerNEW, PseudoLinkedPointer, 4 );
                          else
                          {
                          PseudoLinkedPointer = PseudoLinkedPointer + 4;
                          memcpy( &PseudoLinkedPointerNEW, PseudoLinkedPointer, 4 );
                          }

                          if (PseudoLinkedPointerNEW == 0)
                    {
            //if( (unsigned long)(bufend[LetterOffset] - BufStart) + wrdlen + 4 < (GRMBLhill[(int)wrdlen] * LetterBuffer)/31 ) // OLD slower
            if( (unsigned long)(bufend[LetterOffset] - BufStart) + wrdlen + 4 + 4 < GRMBLFoolAgain[(int)wrdlen] )
                          { memcpy( PseudoLinkedPointer, &bufend[LetterOffset], 4 );
                            // Below 3 lines are commented due to experiment below malloc which shows that allocated memory is ZEROed.
                            //memcpy( bufend[LetterOffset], &BufStart[NumberOfSLOTs*4], 4 ); // means next exists not
                            //bufend[LetterOffset] = bufend[LetterOffset] + 4;
                            //memcpy( bufend[LetterOffset], &BufStart[NumberOfSLOTs*4], 4 ); // means next exists not
                            bufend[LetterOffset] = bufend[LetterOffset] + 4 + 4; // + 4 due to above commenting
                            memcpy( bufend[LetterOffset], wrd, wrdlen ); WORDcountDistinct++; bufNumberOfWords[LetterOffset]++;
                                //bufNoWpS[LetterOffset][Slot]++; // ?! crashes
                             bufend[LetterOffset] = bufend[LetterOffset] + wrdlen;
                            if (MAXusedBuffer[wrdlen] < (unsigned long)(bufend[LetterOffset] - BufStart)) {MAXusedBuffer[wrdlen] = (unsigned long)(bufend[LetterOffset] - BufStart);}
                          }
            else
            { printf( "\nLeprechaun: Failure! Increment 'Memory for each Letter' parameter(third one)!\n" );
fprintf( fp_outLOG, "Input File with a list of TEXTual Files: %s\n", argv[1] );
fprintf( fp_outLOG, "Size of all TEXTual Files: %s\n", _ui64toaKAZEcomma(FilesLEN, llTOaDigits, 10) );
fprintf( fp_outLOG, "Word count: %s of them %s distinct\n", _ui64toaKAZEcomma(WORDcount, llTOaDigits, 10), _ui64toaKAZEcomma((unsigned long long)WORDcountDistinct, llTOaDigits2, 10) );
fprintf( fp_outLOG, "Number Of Files: %lu\n", NumberOfFiles );
fprintf( fp_outLOG, "Number Of Lines: %lu\n", NumberOfLines );
fprintf( fp_outLOG, "Allocated memory in MB: %lu\n", (unsigned long)(memory_size>>20)+1 );
fprintf( fp_outLOG, "Total Attempts to Find/Put WORDs into Binary-Search-Trees: %s\n", _ui64toaKAZEcomma(WORDcountAttemptsToPut, llTOaDigits, 10) );
for( k = 1; k < 32; k++ )
{ fprintf( fp_outLOG, "Words with length %s occupy %sKB of %sKB given i.e. %s%s utilization\n", _ui64toaKAZEzerocomma(k, llTOaDigits, 10)+(26-2), _ui64toaKAZEzerocomma((MAXusedBuffer[k]>>10)+1, llTOaDigits2, 10)+(26-5), _ui64toaKAZEzerocomma((((GRMBLhill[(int)k] * LetterBuffer)/31)>>10)+1, llTOaDigits3, 10)+(26-5), _ui64toaKAZEzerocomma((unsigned long long)(MAXusedBuffer[k]*100)/((GRMBLhill[(int)k] * LetterBuffer)/31), llTOaDigits4, 10)+(26-2), "%\0" ); // 26 are all 26-DESIRED=24
}
fprintf( fp_outLOG, "Used value for third parameter in KB: %lu\n", (unsigned long)Thunderwith );
              fprintf( fp_outLOG, "Leprechaun: Failure! Increment 'Memory for each Letter' parameter(third one)!\n\n");
              return( 1 );
            }
                    }
                          PseudoLinkedPointer = PseudoLinkedPointerNEW;
                        }
                        WORDcountAttemptsToPut++;
                      } // while
                    }
// @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ BST fragment ]
} else
{
// ########################################## B-tree order 3 fragment [
//
// LEAF structure: [LeftPointer][MiddlePointer][RightPointer][LeftWord][RightWord]
//                  4bytes       4bytes         4bytes        wrdlen    wrdlen
//                                                            *         *          <- if *(char *)==0 means the word cell is empty
// ALL B-tree order 3 fragment consists of 3 sub-fragments:
// 1] Search 2] if Search failed Trasirascht(pushing in stack PseudoLinkedPointer(visited LEAFs)) Search 3] Insert Iterative

// 1] Search [ ________1407 line in C - see below: whole Search in assembler________
                    if (PseudoLinkedPointer == 0) // means EMPTY-SLOT
                    {
            if( (unsigned long)(bufend[LetterOffset] - BufStart) + 2*wrdlen + 4 + 4 + 4 < GRMBLFoolAgain[(int)wrdlen] ) // +4 more for BST instead of LL; + more(see LEAF)
                      {
                      memcpy( BufStart+Slot, &bufend[LetterOffset], 4 );
                      bufend[LetterOffset] = bufend[LetterOffset] + 4 + 4 + 4; // + 4 due to above commenting
                      memcpy( bufend[LetterOffset], wrd, wrdlen ); WORDcountDistinct++; bufNumberOfWords[LetterOffset]++;
                      bufend[LetterOffset] = bufend[LetterOffset] + 2*wrdlen;
                      if (MAXusedBuffer[wrdlen] < (unsigned long)(bufend[LetterOffset] - BufStart)) {MAXusedBuffer[wrdlen] = (unsigned long)(bufend[LetterOffset] - BufStart);}
                      }
            else
            { printf( "\nLeprechaun: Failure! Increment 'Memory for each Letter' parameter(third one)!\n" );
fprintf( fp_outLOG, "Input File with a list of TEXTual Files: %s\n", argv[1] );
fprintf( fp_outLOG, "Size of all TEXTual Files: %s\n", _ui64toaKAZEcomma(FilesLEN, llTOaDigits, 10) );
fprintf( fp_outLOG, "Word count: %s of them %s distinct\n", _ui64toaKAZEcomma(WORDcount, llTOaDigits, 10), _ui64toaKAZEcomma((unsigned long long)WORDcountDistinct, llTOaDigits2, 10) );
fprintf( fp_outLOG, "Number Of Files: %lu\n", NumberOfFiles );
fprintf( fp_outLOG, "Number Of Lines: %lu\n", NumberOfLines );
fprintf( fp_outLOG, "Allocated memory in MB: %lu\n", (unsigned long)(memory_size>>20)+1 );
fprintf( fp_outLOG, "Total Attempts to Find/Put WORDs into B-trees order 3: %s\n", _ui64toaKAZEcomma(WORDcountAttemptsToPut, llTOaDigits, 10) );
for( k = 1; k < 32; k++ )
{ fprintf( fp_outLOG, "Words with length %s occupy %sKB of %sKB given i.e. %s%s utilization\n", _ui64toaKAZEzerocomma(k, llTOaDigits, 10)+(26-2), _ui64toaKAZEzerocomma((MAXusedBuffer[k]>>10)+1, llTOaDigits2, 10)+(26-5), _ui64toaKAZEzerocomma((((GRMBLhill[(int)k] * LetterBuffer)/31)>>10)+1, llTOaDigits3, 10)+(26-5), _ui64toaKAZEzerocomma((unsigned long long)(MAXusedBuffer[k]*100)/((GRMBLhill[(int)k] * LetterBuffer)/31), llTOaDigits4, 10)+(26-2), "%\0" ); // 26 are all 26-DESIRED=24
}
fprintf( fp_outLOG, "Used value for third parameter in KB: %lu\n", (unsigned long)Thunderwith );
              fprintf( fp_outLOG, "Leprechaun: Failure! Increment 'Memory for each Letter' parameter(third one)!\n\n");
              return( 1 );
            }
                      FoundInLinkedList = 1;
                    }
                    else // means USED-SLOT
                    { FoundInLinkedList = 0;
                      while (PseudoLinkedPointer != 0 && FoundInLinkedList == 0)
                      { 
// ***** 'P W P' section [
// LW: existence check if ( *(char *)(PseudoLinkedPointer+4+4+4) != 0 )
// RW: existence check if ( *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) != 0 )
                        // here ALWAYS LW exists: no need for existence check - line below
                        // if ( *(char *)(PseudoLinkedPointer+4+4+4) != 0 )
                        if (memcmp(PseudoLinkedPointer+4+4+4,wrd,wrdlen) > 0) // go LP
                           { memcpy( &PseudoLinkedPointerNEW, PseudoLinkedPointer + 0, 4 ); //LP
                             PseudoLinkedPointer = PseudoLinkedPointerNEW;
                           }
                        else if (memcmp(PseudoLinkedPointer+4+4+4,wrd,wrdlen) < 0) // go RP or MP
                           { // RW existence check - line below:
                             if ( *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) != 0 ) // RW exists
                                { // Here all 'P W P' section is repeated; the way of handling case when dynamic number of words in leaf
// +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ***** 'P W P' section 2 [
// LW: existence check if ( *(char *)(PseudoLinkedPointer+4+4+4) != 0 )
// RW: existence check if ( *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) != 0 )
                        // here ALWAYS RW exists: no need for existence check - line below
                        // if ( *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) != 0 )
                        if (memcmp(PseudoLinkedPointer+4+4+4+wrdlen,wrd,wrdlen) > 0) // go MP
                           { memcpy( &PseudoLinkedPointerNEW, PseudoLinkedPointer + 4, 4 ); //MP
                             PseudoLinkedPointer = PseudoLinkedPointerNEW;
                           }
                        else if (memcmp(PseudoLinkedPointer+4+4+4+wrdlen,wrd,wrdlen) < 0) // go RP
                           { // No ?W after RW - go RP
                             memcpy( &PseudoLinkedPointerNEW, PseudoLinkedPointer + 4 + 4, 4 ); //RP
                             PseudoLinkedPointer = PseudoLinkedPointerNEW;
                           }
                        else FoundInLinkedList = 1; // wrd is RW
                        WORDcountAttemptsToPut++;
// ***** 'P W P' section 2 ]
// +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
                                }
                             else // RW empty - go MP
                                { memcpy( &PseudoLinkedPointerNEW, PseudoLinkedPointer + 4, 4 ); //MP
                                  PseudoLinkedPointer = PseudoLinkedPointerNEW;
                                }
                           }
                        else FoundInLinkedList = 1; // wrd is LW
                        WORDcountAttemptsToPut++;
// ***** 'P W P' section ]
                      } // while
                        WORDcountAttemptsToPut--; // - 1 due to BST way of counting i.e. direct hash hit is not counted only successors
                    }
// 1] Search ] ________1484 line in C - see below: whole Search in assembler________

/*
; Line 1397
	jmp	$L2139
$L2042:
; Line 1408
	test	edx, edx
	jne	SHORT $L2110
; Line 1410
	mov	ecx, DWORD PTR _bufend$[esp+esi*4+892340]
	mov	edi, DWORD PTR _GRMBLFoolAgain$[esp+ebx*4+892340]
	lea	edx, DWORD PTR _bufend$[esp+esi*4+892340]
	lea	esi, DWORD PTR [ebx+ebx+12]
	sub	esi, ebp
	add	esi, ecx
	cmp	esi, edi
	mov	DWORD PTR tv4122[esp+892340], edx
	jae	$L2113
; Line 1412
	mov	DWORD PTR [eax+ebp], ecx
; Line 1413
	lea	eax, DWORD PTR [ecx+12]
; Line 1436
	jmp	$L2749
$L2110:
; Line 1437
	mov	DWORD PTR _FoundInLinkedList$[esp+892340], 0
	npad	11
$L2141:
; Line 1438
	mov	eax, DWORD PTR _FoundInLinkedList$[esp+892340]
	test	eax, eax
	jne	$L2142
; Line 1445
	lea	ebp, DWORD PTR [edx+12]
	mov	ecx, ebx
	lea	edi, DWORD PTR _wrd$[esp+892340]
	mov	esi, ebp
	xor	eax, eax
	repe cmpsb
	je	SHORT $L2682
	sbb	eax, eax
	sbb	eax, -1
$L2682:
	test	eax, eax
	jle	SHORT $L2143
; Line 1447
	mov	edx, DWORD PTR [edx]
; Line 1449
	jmp	$L2153
$L2143:
	mov	ecx, ebx
	lea	edi, DWORD PTR _wrd$[esp+892340]
	mov	esi, ebp
	xor	eax, eax
	repe cmpsb
	je	SHORT $L2640
	sbb	eax, eax
	sbb	eax, -1
$L2640:
	test	eax, eax
	jge	SHORT $L2145
; Line 1451
	mov	cl, BYTE PTR [edx+ebx+12]
	test	cl, cl
	lea	eax, DWORD PTR [edx+ebx+12]
	je	SHORT $L2147
; Line 1459
	mov	ecx, ebx
	lea	edi, DWORD PTR _wrd$[esp+892340]
	mov	esi, eax
	xor	ebp, ebp
	repe cmpsb
	je	SHORT $L2695
	sbb	ebp, ebp
	sbb	ebp, -1
$L2695:
	test	ebp, ebp
	jle	SHORT $L2148
; Line 1461
	mov	edx, DWORD PTR [edx+4]
; Line 1463
	jmp	SHORT $L2151
$L2148:
	mov	esi, eax
	mov	ecx, ebx
	lea	edi, DWORD PTR _wrd$[esp+892340]
	xor	eax, eax
	repe cmpsb
	je	SHORT $L2642
	sbb	eax, eax
	sbb	eax, -1
$L2642:
	test	eax, eax
	jge	SHORT $L2150
; Line 1466
	mov	edx, DWORD PTR [edx+8]
; Line 1468
	jmp	SHORT $L2151
$L2150:
	mov	DWORD PTR _FoundInLinkedList$[esp+892340], 1
$L2151:
; Line 1469
	mov	ecx, DWORD PTR _WORDcountAttemptsToPut$[esp+892340]
	mov	eax, DWORD PTR _WORDcountAttemptsToPut$[esp+892344]
	add	ecx, 1
	adc	eax, 0
	mov	DWORD PTR _WORDcountAttemptsToPut$[esp+892340], ecx
	mov	DWORD PTR _WORDcountAttemptsToPut$[esp+892344], eax
; Line 1473
	jmp	SHORT $L2153
$L2147:
; Line 1475
	mov	edx, DWORD PTR [edx+4]
; Line 1478
	jmp	SHORT $L2153
$L2145:
	mov	DWORD PTR _FoundInLinkedList$[esp+892340], 1
$L2153:
; Line 1479
	mov	esi, DWORD PTR _WORDcountAttemptsToPut$[esp+892340]
	mov	ecx, DWORD PTR _WORDcountAttemptsToPut$[esp+892344]
	add	esi, 1
	adc	ecx, 0
	test	edx, edx
	mov	DWORD PTR _WORDcountAttemptsToPut$[esp+892340], esi
	mov	DWORD PTR _WORDcountAttemptsToPut$[esp+892344], ecx
	jne	$L2141
$L2142:
; Line 1482
	mov	edx, DWORD PTR _WORDcountAttemptsToPut$[esp+892340]
	mov	ecx, DWORD PTR _WORDcountAttemptsToPut$[esp+892344]
	or	eax, -1
	add	edx, eax
	adc	ecx, eax
	mov	DWORD PTR _WORDcountAttemptsToPut$[esp+892344], ecx
	mov	DWORD PTR _WORDcountAttemptsToPut$[esp+892340], edx
$L2139:
*/

if (FoundInLinkedList == 0)
{
// 2] if Search failed Trasirascht(pushing in stack PseudoLinkedPointer(visited LEAFs)) Search [
//    'TracingSearch' is the same as 'Search' except that adds the trail in my simulated stack,
//    the goal is not to waste time in 'Search' by dealing with no needed trail in case of not 'Insert'.
//    Simulated stack contains pairs of 'Address of ParentLEAF' + 'Offset of ParentPointer in ParentLEAF i.e. 0 for LP, 4 for MP, 8 for RP'.
//    'Offset ...' saves unnecessary comparisons of NEWword which after splitting goes up.
                    memcpy( &PseudoLinkedPointer, BufStart+Slot, 4 );
                StackPtr = 0;
                      while (PseudoLinkedPointer != 0)
                      { 
// ***** 'P W P' section [
// LW: existence check if ( *(char *)(PseudoLinkedPointer+4+4+4) != 0 )
// RW: existence check if ( *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) != 0 )
                        // here ALWAYS LW exists: no need for existence check - line below
                        // if ( *(char *)(PseudoLinkedPointer+4+4+4) != 0 )
                        if (memcmp(PseudoLinkedPointer+4+4+4,wrd,wrdlen) > 0) // go LP
                           { memcpy( &PseudoLinkedPointerNEW, PseudoLinkedPointer + 0, 4 ); //LP
                if (StackPtr > 8192*3-1-1) { printf( "\nLeprechaun: Failure! 'B-tree order 3' simulated stack overflow!\n" ); return( 13 );}
                BSTstack[StackPtr] = PseudoLinkedPointer; ++StackPtr; //pt to visited leaf
                BSTstack[StackPtr] = 0; ++StackPtr; //LPoffset=0;MPoffset=4;RPoffset=8;
                             PseudoLinkedPointer = PseudoLinkedPointerNEW;
                           }
                        else if (memcmp(PseudoLinkedPointer+4+4+4,wrd,wrdlen) < 0) // go RP or MP
                           { // RW existence check - line below:
                             if ( *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) != 0 ) // RW exists
                                { // Here all 'P W P' section is repeated; the way of handling case when dynamic number of words in leaf
// +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ***** 'P W P' section 2 [
// LW: existence check if ( *(char *)(PseudoLinkedPointer+4+4+4) != 0 )
// RW: existence check if ( *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) != 0 )
                        // here ALWAYS RW exists: no need for existence check - line below
                        // if ( *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) != 0 )
                        if (memcmp(PseudoLinkedPointer+4+4+4+wrdlen,wrd,wrdlen) > 0) // go MP
                           { memcpy( &PseudoLinkedPointerNEW, PseudoLinkedPointer + 4, 4 ); //MP
                if (StackPtr > 8192*3-1-1) { printf( "\nLeprechaun: Failure! 'B-tree order 3' simulated stack overflow!\n" ); return( 13 );}
                BSTstack[StackPtr] = PseudoLinkedPointer; ++StackPtr; //pt to visited leaf
                BSTstack[StackPtr] = 4; ++StackPtr; //LPoffset=0;MPoffset=4;RPoffset=8;
                             PseudoLinkedPointer = PseudoLinkedPointerNEW;
                           }
                        else if (memcmp(PseudoLinkedPointer+4+4+4+wrdlen,wrd,wrdlen) < 0) // go RP
                           { // No ?W after RW - go RP
                             memcpy( &PseudoLinkedPointerNEW, PseudoLinkedPointer + 4 + 4, 4 ); //RP
                if (StackPtr > 8192*3-1-1) { printf( "\nLeprechaun: Failure! 'B-tree order 3' simulated stack overflow!\n" ); return( 13 );}
                BSTstack[StackPtr] = PseudoLinkedPointer; ++StackPtr; //pt to visited leaf
                BSTstack[StackPtr] = 8; ++StackPtr; //LPoffset=0;MPoffset=4;RPoffset=8;
                             PseudoLinkedPointer = PseudoLinkedPointerNEW;
                           }
                        else FoundInLinkedList = 1; // wrd is RW
// ***** 'P W P' section 2 ]
// +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
                                }
                             else // RW empty - go MP
                                { memcpy( &PseudoLinkedPointerNEW, PseudoLinkedPointer + 4, 4 ); //MP
                if (StackPtr > 8192*3-1-1) { printf( "\nLeprechaun: Failure! 'B-tree order 3' simulated stack overflow!\n" ); return( 13 );}
                BSTstack[StackPtr] = PseudoLinkedPointer; ++StackPtr; //pt to visited leaf
                BSTstack[StackPtr] = 4; ++StackPtr; //LPoffset=0;MPoffset=4;RPoffset=8;
                                  PseudoLinkedPointer = PseudoLinkedPointerNEW;
                                }
                           }
                        else FoundInLinkedList = 1; // wrd is LW
// ***** 'P W P' section ]
                      } // while
// 2] if Search failed Trasirascht(pushing in stack PseudoLinkedPointer(visited LEAFs)) Search ]

// 3] Insert Iterative [
//    There are total 4 situations:
//    Case #1: Outer NODE(including ROOT) [  ][  ][  ][LW][  ]
//    Case #2: Outer NODE(including ROOT) [  ][  ][  ][LW][RW]  Split Occurs   -----  
//    Case #3: ROOT                       [LP][MP][  ][LW][  ]                     |  'wrdUP' (wrdlen bytes) 
//    Case #4: Inner NODE(including ROOT) [LP][MP][RP][LW][RW]  Split Occurs   --- |   &
//                                                                               | |  'PseudoLinkedPointerNEW' (ptr to NEW LEAF)
//    There are total 2 situations for PARENT LEAF: <-------------------------------  ARE GOING UP
//    Case #3: [LP][MP][  ][LW][  ]  
//    Case #4: [LP][MP][RP][LW][RW]  Split Occurs

// ~  First deal alonely with the OUTER NODE(LEAF) where Search stopped i.e Case #1 & Case #2:
        POffsetInLEAF = BSTstack[--StackPtr];
        PseudoLinkedPointer = BSTstack[--StackPtr];
// NOTE: ONE LEAF IS FULL ONLY WHEN LAST CELL FOR KEY(here RW) EXISTS!
// RW: existence check if ( *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) != 0 )
if ( *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) != 0 ) // If LEAF is full: Case #2
    { SplitOccured = 1; WORDcountDistinct++; bufNumberOfWords[LetterOffset]++;
            // ALlocate NEW LEAF:
            if( (unsigned long)(bufend[LetterOffset] - BufStart) + 2*wrdlen + 4 + 4 + 4 < GRMBLFoolAgain[(int)wrdlen] ) // +4 more for BST instead of LL; + more(see LEAF)
                      {
                      memcpy( &PseudoLinkedPointerNEW, &bufend[LetterOffset], 4 );
                      bufend[LetterOffset] = bufend[LetterOffset] + 4 + 4 + 4; // + 4 due to above commenting
                      bufend[LetterOffset] = bufend[LetterOffset] + 2*wrdlen;
                      if (MAXusedBuffer[wrdlen] < (unsigned long)(bufend[LetterOffset] - BufStart)) {MAXusedBuffer[wrdlen] = (unsigned long)(bufend[LetterOffset] - BufStart);}
                      }
            else
            { printf( "\nLeprechaun: Failure! Increment 'Memory for each Letter' parameter(third one)!\n" );
fprintf( fp_outLOG, "Input File with a list of TEXTual Files: %s\n", argv[1] );
fprintf( fp_outLOG, "Size of all TEXTual Files: %s\n", _ui64toaKAZEcomma(FilesLEN, llTOaDigits, 10) );
fprintf( fp_outLOG, "Word count: %s of them %s distinct\n", _ui64toaKAZEcomma(WORDcount, llTOaDigits, 10), _ui64toaKAZEcomma((unsigned long long)WORDcountDistinct, llTOaDigits2, 10) );
fprintf( fp_outLOG, "Number Of Files: %lu\n", NumberOfFiles );
fprintf( fp_outLOG, "Number Of Lines: %lu\n", NumberOfLines );
fprintf( fp_outLOG, "Allocated memory in MB: %lu\n", (unsigned long)(memory_size>>20)+1 );
fprintf( fp_outLOG, "Total Attempts to Find/Put WORDs into B-trees order 3: %s\n", _ui64toaKAZEcomma(WORDcountAttemptsToPut, llTOaDigits, 10) );
for( k = 1; k < 32; k++ )
{ fprintf( fp_outLOG, "Words with length %s occupy %sKB of %sKB given i.e. %s%s utilization\n", _ui64toaKAZEzerocomma(k, llTOaDigits, 10)+(26-2), _ui64toaKAZEzerocomma((MAXusedBuffer[k]>>10)+1, llTOaDigits2, 10)+(26-5), _ui64toaKAZEzerocomma((((GRMBLhill[(int)k] * LetterBuffer)/31)>>10)+1, llTOaDigits3, 10)+(26-5), _ui64toaKAZEzerocomma((unsigned long long)(MAXusedBuffer[k]*100)/((GRMBLhill[(int)k] * LetterBuffer)/31), llTOaDigits4, 10)+(26-2), "%\0" ); // 26 are all 26-DESIRED=24
}
fprintf( fp_outLOG, "Used value for third parameter in KB: %lu\n", (unsigned long)Thunderwith );
              fprintf( fp_outLOG, "Leprechaun: Failure! Increment 'Memory for each Letter' parameter(third one)!\n\n");
              return( 1 );
            }
      if (POffsetInLEAF == 0) // wrd < LW
         {
                      memcpy( wrdUP, PseudoLinkedPointer+4+4+4, wrdlen ); // LW up
                      memcpy( PseudoLinkedPointer+4+4+4, wrd, wrdlen );   // wrd go to OLD LEAF
                      memcpy( PseudoLinkedPointerNEW+4+4+4, PseudoLinkedPointer+4+4+4+wrdlen, wrdlen ); // RW go to NEW LEAF
                      *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) = 0;    // RW mark unused in OLD LEAF
         }
      if (POffsetInLEAF == 4) // LW < wrd < RW
         {
                      memcpy( wrdUP, wrd, wrdlen );                       // wrd up
                      memcpy( PseudoLinkedPointerNEW+4+4+4, PseudoLinkedPointer+4+4+4+wrdlen, wrdlen ); // RW go to NEW LEAF
                      *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) = 0;    // RW mark unused in OLD LEAF
         }
      if (POffsetInLEAF == 8) // wrd > RW
         {
                      memcpy( wrdUP, PseudoLinkedPointer+4+4+4+wrdlen, wrdlen ); // RW up
                      *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) = 0;    // RW mark unused in OLD LEAF
                      memcpy( PseudoLinkedPointerNEW+4+4+4, wrd, wrdlen );   // wrd go to NEW LEAF
         }
    }
else // If LEAF is not full: Case #1
    { SplitOccured = 0; WORDcountDistinct++; bufNumberOfWords[LetterOffset]++;
      if (POffsetInLEAF == 0) // wrd < [LW][] so [LW][] -> [][LW] -> [wrd][LW]
         {
                      memcpy( PseudoLinkedPointer+4+4+4+wrdlen, PseudoLinkedPointer+4+4+4, wrdlen );
                      memcpy( PseudoLinkedPointer+4+4+4, wrd, wrdlen );
         }
      if (POffsetInLEAF == 4) // wrd > [LW][] so [LW][] -> [LW][wrd]
         {
                      memcpy( PseudoLinkedPointer+4+4+4+wrdlen, wrd, wrdlen );
         }
    }

if (SplitOccured != 0)
{
// ~  Second deal with the INNER NODE(S) i.e Case #3 & Case #4:
        while (StackPtr != 0 || SplitOccured != 0)
              {
        // 'PseudoLinkedPointerNEW' is new LEAF to be inserted
        // 'wrdUP' is NEW word to be inserted
        if (StackPtr != 0)
        {
           POffsetInLEAF = BSTstack[--StackPtr];
           PseudoLinkedPointer = BSTstack[--StackPtr];
if ( *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) != 0 ) // If LEAF is full: Case #4
    { SplitOccured = 1;
                      memcpy( wrdUPold, wrdUP, wrdlen ); // LW up
                      PseudoLinkedPointerNEWold = PseudoLinkedPointerNEW;
            // ALlocate NEW LEAF:
            if( (unsigned long)(bufend[LetterOffset] - BufStart) + 2*wrdlen + 4 + 4 + 4 < GRMBLFoolAgain[(int)wrdlen] ) // +4 more for BST instead of LL; + more(see LEAF)
                      {
                      memcpy( &PseudoLinkedPointerNEW, &bufend[LetterOffset], 4 );
                      bufend[LetterOffset] = bufend[LetterOffset] + 4 + 4 + 4; // + 4 due to above commenting
                      bufend[LetterOffset] = bufend[LetterOffset] + 2*wrdlen;
                      if (MAXusedBuffer[wrdlen] < (unsigned long)(bufend[LetterOffset] - BufStart)) {MAXusedBuffer[wrdlen] = (unsigned long)(bufend[LetterOffset] - BufStart);}
                      }
            else
            { printf( "\nLeprechaun: Failure! Increment 'Memory for each Letter' parameter(third one)!\n" );
fprintf( fp_outLOG, "Input File with a list of TEXTual Files: %s\n", argv[1] );
fprintf( fp_outLOG, "Size of all TEXTual Files: %s\n", _ui64toaKAZEcomma(FilesLEN, llTOaDigits, 10) );
fprintf( fp_outLOG, "Word count: %s of them %s distinct\n", _ui64toaKAZEcomma(WORDcount, llTOaDigits, 10), _ui64toaKAZEcomma((unsigned long long)WORDcountDistinct, llTOaDigits2, 10) );
fprintf( fp_outLOG, "Number Of Files: %lu\n", NumberOfFiles );
fprintf( fp_outLOG, "Number Of Lines: %lu\n", NumberOfLines );
fprintf( fp_outLOG, "Allocated memory in MB: %lu\n", (unsigned long)(memory_size>>20)+1 );
fprintf( fp_outLOG, "Total Attempts to Find/Put WORDs into B-trees order 3: %s\n", _ui64toaKAZEcomma(WORDcountAttemptsToPut, llTOaDigits, 10) );
for( k = 1; k < 32; k++ )
{ fprintf( fp_outLOG, "Words with length %s occupy %sKB of %sKB given i.e. %s%s utilization\n", _ui64toaKAZEzerocomma(k, llTOaDigits, 10)+(26-2), _ui64toaKAZEzerocomma((MAXusedBuffer[k]>>10)+1, llTOaDigits2, 10)+(26-5), _ui64toaKAZEzerocomma((((GRMBLhill[(int)k] * LetterBuffer)/31)>>10)+1, llTOaDigits3, 10)+(26-5), _ui64toaKAZEzerocomma((unsigned long long)(MAXusedBuffer[k]*100)/((GRMBLhill[(int)k] * LetterBuffer)/31), llTOaDigits4, 10)+(26-2), "%\0" ); // 26 are all 26-DESIRED=24
}
fprintf( fp_outLOG, "Used value for third parameter in KB: %lu\n", (unsigned long)Thunderwith );
              fprintf( fp_outLOG, "Leprechaun: Failure! Increment 'Memory for each Letter' parameter(third one)!\n\n");
              return( 1 );
            }
      if (POffsetInLEAF == 0) // wrdUPold < LW
         {
                      memcpy( wrdUP, PseudoLinkedPointer+4+4+4, wrdlen ); // LW up
                      memcpy( PseudoLinkedPointer+4+4+4, wrdUPold, wrdlen );   // wrdUPold go to OLD LEAF
                      memcpy( PseudoLinkedPointerNEW+4+4+4, PseudoLinkedPointer+4+4+4+wrdlen, wrdlen ); // RW go to NEW LEAF
                      *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) = 0;    // RW mark unused in OLD LEAF
                      // [LP](PseudoLinkedPointerNEWold)[MP][RP](wrdUPold)[LW][RW]       ------
                      //              pair [LW] PseudoLinkedPointerNEW goes up                |
                      // PseudoLinkedPointer:                         PseudoLinkedPointerNEW: |
                      // [LP](PseudoLinkedPointerNEWold)[](wrdUPold)  [MP][RP][][RW]      <----
                      // no need to put zero in RP because logic is based on words existence:
                      memcpy( PseudoLinkedPointerNEW+0, PseudoLinkedPointer+4, 4 );  
                      memcpy( PseudoLinkedPointerNEW+4, PseudoLinkedPointer+8, 4 );  
                      memcpy( PseudoLinkedPointer+4, &PseudoLinkedPointerNEWold, 4 );
         }
      if (POffsetInLEAF == 4) // LW < wrdUPold < RW
         {
                      memcpy( wrdUP, wrdUPold, wrdlen );                       // wrdUPold up
                      memcpy( PseudoLinkedPointerNEW+4+4+4, PseudoLinkedPointer+4+4+4+wrdlen, wrdlen ); // RW go to NEW LEAF
                      *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) = 0;    // RW mark unused in OLD LEAF
                      // [LP][MP](PseudoLinkedPointerNEWold)[RP][LW](wrdUPold)[RW]       ------
                      //              pair [wrdUPold] PseudoLinkedPointerNEW goes up          |
                      // PseudoLinkedPointer:  PseudoLinkedPointerNEW:                        |
                      // [LP][MP][][LW]        (PseudoLinkedPointerNEWold)[RP][][RW]      <----
                      // no need to put zero in RP because logic is based on words existence:
                      memcpy( PseudoLinkedPointerNEW+0, &PseudoLinkedPointerNEWold, 4 );  
                      memcpy( PseudoLinkedPointerNEW+4, PseudoLinkedPointer+8, 4 );  
         }
      if (POffsetInLEAF == 8) // wrdUPold > RW
         {
                      memcpy( wrdUP, PseudoLinkedPointer+4+4+4+wrdlen, wrdlen ); // RW up
                      *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) = 0;    // RW mark unused in OLD LEAF
                      memcpy( PseudoLinkedPointerNEW+4+4+4, wrdUPold, wrdlen );   // wrdUPold go to NEW LEAF
                      // [LP][MP][RP](PseudoLinkedPointerNEWold)[LW][RW](wrdUPold)       ------
                      //              pair [RW] PseudoLinkedPointerNEW goes up                |
                      // PseudoLinkedPointer:  PseudoLinkedPointerNEW:                        |
                      // [LP][MP][][LW]        [RP](PseudoLinkedPointerNEWold)[](wrdUPold) <---
                      // no need to put zero in RP because logic is based on words existence:
                      memcpy( PseudoLinkedPointerNEW+0, PseudoLinkedPointer+8, 4 );  
                      memcpy( PseudoLinkedPointerNEW+4, &PseudoLinkedPointerNEWold, 4 );
         }
    }
else // If LEAF is not full: Case #3
    { SplitOccured = 0; 
      if (POffsetInLEAF == 0) // wrdUP < [LW][] so [LW][] -> [][LW] -> [wrdUP][LW]
         {
                      memcpy( PseudoLinkedPointer+4+4+4+wrdlen, PseudoLinkedPointer+4+4+4, wrdlen );
                      memcpy( PseudoLinkedPointer+4+4+4, wrdUP, wrdlen );
                              // [LP][MP][] -> [LP][][MP] -> [LP][np][MP]
                      memcpy( PseudoLinkedPointer+8, PseudoLinkedPointer+4, 4 );  
                      memcpy( PseudoLinkedPointer+4, &PseudoLinkedPointerNEW, 4 );
         }
      if (POffsetInLEAF == 4) // wrdUP > [LW][] so [LW][] -> [LW][wrdUP]
         {
                      memcpy( PseudoLinkedPointer+4+4+4+wrdlen, wrdUP, wrdlen );
                              // [LP][MP][] -> [LP][MP][np]
                      memcpy( PseudoLinkedPointer+8, &PseudoLinkedPointerNEW, 4 );
         }
           break; 
    }
        }
        else // Empty stack means ROOT and more over ROOT is already splitted(Case #4 is off)
        {
     // If LEAF is not full: Case #3
     // THIS IS WHERE A NEW(SECOND) LEAF 'PseudoLinkedPointerROOT' must be allocated:
            if( (unsigned long)(bufend[LetterOffset] - BufStart) + 2*wrdlen + 4 + 4 + 4 < GRMBLFoolAgain[(int)wrdlen] ) // +4 more for BST instead of LL; + more(see LEAF)
                      {
                      memcpy( &PseudoLinkedPointerROOT, &bufend[LetterOffset], 4 );
                      bufend[LetterOffset] = bufend[LetterOffset] + 4 + 4 + 4; // + 4 due to above commenting
                      memcpy( bufend[LetterOffset], wrdUP, wrdlen ); 
                      bufend[LetterOffset] = bufend[LetterOffset] + 2*wrdlen;
                      if (MAXusedBuffer[wrdlen] < (unsigned long)(bufend[LetterOffset] - BufStart)) {MAXusedBuffer[wrdlen] = (unsigned long)(bufend[LetterOffset] - BufStart);}
                      }
            else
            { printf( "\nLeprechaun: Failure! Increment 'Memory for each Letter' parameter(third one)!\n" );
fprintf( fp_outLOG, "Input File with a list of TEXTual Files: %s\n", argv[1] );
fprintf( fp_outLOG, "Size of all TEXTual Files: %s\n", _ui64toaKAZEcomma(FilesLEN, llTOaDigits, 10) );
fprintf( fp_outLOG, "Word count: %s of them %s distinct\n", _ui64toaKAZEcomma(WORDcount, llTOaDigits, 10), _ui64toaKAZEcomma((unsigned long long)WORDcountDistinct, llTOaDigits2, 10) );
fprintf( fp_outLOG, "Number Of Files: %lu\n", NumberOfFiles );
fprintf( fp_outLOG, "Number Of Lines: %lu\n", NumberOfLines );
fprintf( fp_outLOG, "Allocated memory in MB: %lu\n", (unsigned long)(memory_size>>20)+1 );
fprintf( fp_outLOG, "Total Attempts to Find/Put WORDs into B-trees order 3: %s\n", _ui64toaKAZEcomma(WORDcountAttemptsToPut, llTOaDigits, 10) );
for( k = 1; k < 32; k++ )
{ fprintf( fp_outLOG, "Words with length %s occupy %sKB of %sKB given i.e. %s%s utilization\n", _ui64toaKAZEzerocomma(k, llTOaDigits, 10)+(26-2), _ui64toaKAZEzerocomma((MAXusedBuffer[k]>>10)+1, llTOaDigits2, 10)+(26-5), _ui64toaKAZEzerocomma((((GRMBLhill[(int)k] * LetterBuffer)/31)>>10)+1, llTOaDigits3, 10)+(26-5), _ui64toaKAZEzerocomma((unsigned long long)(MAXusedBuffer[k]*100)/((GRMBLhill[(int)k] * LetterBuffer)/31), llTOaDigits4, 10)+(26-2), "%\0" ); // 26 are all 26-DESIRED=24
}
fprintf( fp_outLOG, "Used value for third parameter in KB: %lu\n", (unsigned long)Thunderwith );
              fprintf( fp_outLOG, "Leprechaun: Failure! Increment 'Memory for each Letter' parameter(third one)!\n\n");
              return( 1 );
            }
            // Here                    --  'PseudoLinkedPointerROOT' --
            //                         |           (wrdUP)            |
            // 'PseudoLinkedPointer' <--                              --> 'PseudoLinkedPointerNEW'
            //         (LW)                                                          (RW)
            memcpy( PseudoLinkedPointerROOT, &PseudoLinkedPointer, 4 );      // LP
            memcpy( PseudoLinkedPointerROOT+4, &PseudoLinkedPointerNEW, 4 ); // MP
            // Here must NEW ROOT be updated i.e. HASH table(SLOT) must point it:
            memcpy( BufStart+Slot, &PseudoLinkedPointerROOT, 4 );
           break; //because it is ROOT without split
        }
              } // while
} //if (SplitOccured != 0)
// 3] Insert Iterative ]
} //if (FoundInLinkedList == 0)
// ########################################## B-tree order 3 ]
} //if (BSTorBtree == 0)
} else { //if (BSTorBtree != 2) {
	// External Btrees [

	// _ ASCII code 095 
	// ` ASCII code 096  \
	// a ASCII code 097  / In total 26+1+1 radix instead of 27 to avoid +1 for each '_', code 096 not used.
	// z ASCII code 122
	// The hash  for 'a_quadruplet_for_example' will be calculated for first 5 chars:
	// = (byte1-'_')*28*28*28*28 + (byte2-'_')*28*28*28 + (byte3-'_')*28*28 + (byte4-'_')*28 + (byte5-'_')
	// Hash slots are 28*28*28*28*28 = 17,210,368 each containing one 64bit pointer i.e. 8bytes in length.
	// Hash size = 17,210,368*8 = 137,682,944 bytes
	// When at end all these slots(17,210,368- Btrees) are traversed the outcome is a sorted wordlist - no need of sorting.

// D:\_KAZE_new-stuff\Leprechaun_quadrupleton_r14_minus>Leprechaun_quadrupleton.exe GRAFFITH_2048.lst GRAFFITH_2048.wrd z
// Leprechaun(Fast Greedy Word-Ripper), rev. 14_minus_quadrupleton, written by Svalqyatchx.
// Leprechaun: 'Oh, well, didn't you hear? Bigger is good, but jumbo is dear.'
// Kaze: Let's see what a 3-way hash + 6,602,752 Binary-Search-Trees can give us,
//       also the performance of a 3-way hash + 6,602,752 B-Trees of order 3,
//       also the performance of a 1-way hash + 17,210,368 external B-Trees of order 3.
// Size of input file with files for Leprechauning: 42140
// Allocating HASH memory 137,683,009 bytes ... OK
// Allocating/ZEROing 1,047,566 bytes swap file ... OK
// Size of Input TEXTual file: 33,470,581
// |; Word count: 3,045,077 of them 0 distinct; Done: 64/64
// ...
// Size of Input TEXTual file: 17,403,406
// /; Word count: 2,710,601,882 of them 0 distinct; Done: 64/64
// Bytes per second performance: 17,694,246B/s
// Words per second performance: 1,730,907W/s
// Leprechaun: Done.
// 
// Leprechaun report:
// Number Of Trees(GREATER THE BETTER): 1,646,004

// TO DO - it is long overdue: at last make sort stage at end unnecessary - only traversing-and-dumping!

	BufStart = pointerflush;
//	Slot = ((wrd[0]-'_')*28*28*28*28 + (wrd[1]-'_')*28*28*28 + (wrd[2]-'_')*28*28 + (wrd[3]-'_')*28 + (wrd[4]-'_'))<<3;
//	Slot = FNV1A_Hash_Jesteress_27bit(wrd, wrdlen)<<3; // Commented since r.14++++ because of passes.
	Slot = FNV1A_Hash_Jesteress_27bit(wrd, wrdlen);

// Bug fix for all r.14+++ and below! [
memcpy( &wrd[(LongestLineInclusive+1+4)-4], &NULLsForWRD, 4 );
// Bug fix for all r.14+++ and below! ]

// Example: HashInBITS-HashChunkSizeInBITS=2
//          HashInBITS = 5
//          HashChunkSizeInBITS = 3
//          RipPasses = 1<<(HashInBITS-HashChunkSizeInBITS) i.e. 1<<2 which is 4 i.e. 32 slots with 4 passes 8 slots each.
//          00??? 5bits 0-7
//          01??? 5bits 8-15
//          10??? 5bits 16-23
//          11??? 5bits 24-31
if ( (Slot>>HashChunkSizeInBITS) == RipPasses ) {
	Slot = Slot<<3;

	//Slot = 0; // One Tree only!
	memcpy( &PseudoLinkedPointer_64, BufStart+Slot, 8 );

// ########################################## B-tree order 3 fragment 64bit [
//
// LEAF structure: [LeftPointer][MiddlePointer][RightPointer][LeftWord][RightWord]
//                  4bytes       4bytes         4bytes        wrdlen    wrdlen
//                                                            *         *          <- if *(char *)==0 means the word cell is empty
// ALL B-tree order 3 fragment consists of 3 sub-fragments:
// 1] Search 2] if Search failed Trasirascht(pushing in stack PseudoLinkedPointer(visited LEAFs)) Search 3] Insert Iterative

// LEAF_64 structure: [LeftPointer][MiddlePointer][RightPointer][LeftWord]               [RightWord]
//                     8bytes       8bytes         8bytes        LongestLineInclusive+1+4 LongestLineInclusive+1+4
//                                                               *         *          <- if *(char *)==0 means the word cell is empty
// Note: In order to use one fread(and strcmp) a NULL postfix for LeftWord, RightWord i.e. LeftWord_Length=len(LeftWord)+1 a kinda stupid choice ...
// Note: BufEnd_64 in fact is the first free position after the BUFFER END!

// 1] Search [
                    if (PseudoLinkedPointer_64 == 0) // means EMPTY-SLOT
                    {
if ( REUSE != 2 ) { //  REUSE [ // This line comes since r16
if (DoNotInsertFlag == 0)
{ // This line comes since r15FIXFIX+ [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[
            //if( (unsigned long)(bufend[LetterOffset] - BufStart) + 2*wrdlen + 4 + 4 + 4 < GRMBLFoolAgain[(int)wrdlen] ) // +4 more for BST instead of LL; + more(see LEAF)
            //          {
            //          memcpy( BufStart+Slot, &bufend[LetterOffset], 4 );
            //          bufend[LetterOffset] = bufend[LetterOffset] + 4 + 4 + 4; // + 4 due to above commenting
            //          memcpy( bufend[LetterOffset], wrd, wrdlen ); WORDcountDistinct++; bufNumberOfWords[LetterOffset]++;
            //          bufend[LetterOffset] = bufend[LetterOffset] + 2*wrdlen;
            //          if (MAXusedBuffer[wrdlen] < (unsigned long)(bufend[LetterOffset] - BufStart)) {MAXusedBuffer[wrdlen] = (unsigned long)(bufend[LetterOffset] - BufStart);}
            //          }
            if( 8 + 8 + 8 + 2*(LongestLineInclusive+1+4) < size_in64_L14 - (BufEnd_64-(unsigned long long)pointerflush_64) ) // the longest wrdlen is LongestLineInclusive but actual is LongestLineInclusive(+ CR char)
                      {
                        memcpy( BufStart+Slot, &BufEnd_64, 8 );
			BufEnd_64 = BufEnd_64 + 8 + 8 + 8;
				if (BSTorBtree == 2) {
			fsetpos(fp_outRG, &BufEnd_64);
		       	fwrite(wrd, wrdlen, 1, fp_outRG); WORDcountDistinct++;
		       	//fwrite(&OneChar_ieByte, 1, 1, fp_outRG); // Write ZERO ASCII code
			// r.14++ The above line was commented because the pool is already ZEROed.
				} else { // ########## 64bit memory manipulations [
			memcpy( (char *)BufEnd_64, wrd, wrdlen ); WORDcountDistinct++;
				} // ########## 64bit memory manipulations ]
			BufEnd_64 = BufEnd_64 + 2*(LongestLineInclusive+1+4);
			//fsetpos(fp_outRG, &BufEnd_64);
                      }
            else
            { printf( "\nLeprechaun: Failure! Increment 'Memory for each Letter' parameter(third one)!\n" );
fprintf( fp_outLOG, "Word count: %s of them %s distinct\n", _ui64toaKAZEcomma(WORDcount, llTOaDigits, 10), _ui64toaKAZEcomma((unsigned long long)WORDcountDistinct, llTOaDigits2, 10) );
fprintf( fp_outLOG, "Allocated memory: %s bytes\n", _ui64toaKAZEcomma(size_in64_L14, llTOaDigits, 10) );
fprintf( fp_outLOG, "Total Attempts to Find/Put WORDs into B-trees order 3: %s\n", _ui64toaKAZEcomma(WORDcountAttemptsToPut, llTOaDigits, 10) );
fprintf( fp_outLOG, "Used value for third parameter in KB: %lu\n", (unsigned long)Thunderwith );
              fprintf( fp_outLOG, "Leprechaun: Failure! Increment 'Memory for each Letter' parameter(third one)!\n\n");
              return( 1 );
            }
                      FoundInLinkedList = 1;
} // This line comes since r15FIXFIX+ ]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]
}
                    }
                    else // means USED-SLOT
                    { FoundInLinkedList = 0;
                StackPtr = 0;
//                      while (PseudoLinkedPointer != 0 && FoundInLinkedList == 0)
                      while (PseudoLinkedPointer_64 != 0 && FoundInLinkedList == 0)
                      { 
// ***** 'P W P' section [
// LW: existence check if ( *(char *)(PseudoLinkedPointer+4+4+4) != 0 )
// RW: existence check if ( *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) != 0 )
                        // here ALWAYS LW exists: no need for existence check - line below
                        // if ( *(char *)(PseudoLinkedPointer+4+4+4) != 0 )
//                        if (memcmp(PseudoLinkedPointer+4+4+4,wrd,wrdlen) > 0) // go LP
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			//PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8;
			//fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			//fread(&FourGramL[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			// [  //r.14+
			PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64;
				if (BSTorBtree == 2) {
			fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			fread(&LEAF[0], 8+8+8+2*(LongestLineInclusive+1+4), 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( &LEAF[0], (char *)PseudoLinkedPointerAUX_64, 8+8+8+2*(LongestLineInclusive+1+4) );
				} // ########## 64bit memory manipulations ]
		  	memcpy( &FourGramL[0], &LEAF[8 + 8 + 8], (LongestLineInclusive+1+4) );
			// ]  //r.14+
                        if (strcmpKAZE13(FourGramL, wrd) > 0) // go LP
//                           { memcpy( &PseudoLinkedPointerNEW, PseudoLinkedPointer + 0, 4 ); //LP
//                             PseudoLinkedPointer = PseudoLinkedPointerNEW;
//                           }
                           { 
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			//PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 0; //LP
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
                if (StackPtr > 8192*3-1-1) { printf( "\nLeprechaun: Failure! 'B-tree order 3' simulated stack overflow!\n" ); return( 13 );}
                BSTstack[StackPtr] = PseudoLinkedPointer_64; ++StackPtr; //pt to visited leaf
                BSTstack[StackPtr] = 0; ++StackPtr; //LPoffset=0;MPoffset=8;RPoffset=16;
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			//fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			//fread(&PseudoLinkedPointer_64, 8, 1, fp_outRG);
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			// [  //r.14+
		  	memcpy( &PseudoLinkedPointer_64, &LEAF[0], 8 );
			// ]  //r.14+
                           }
//                        else if (memcmp(PseudoLinkedPointer+4+4+4,wrd,wrdlen) < 0) // go RP or MP
                        else if (strcmpKAZE13(FourGramL, wrd) < 0) // go RP or MP
                           { // RW existence check - line below:
//                             if ( *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) != 0 ) // RW exists
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			//PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8 + (LongestLineInclusive+1+4); //RW
			//fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
		       	//fread(&SomeByte, 1, 1, fp_outRG);
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			// [  //r.14+
		  	memcpy( &SomeByte, &LEAF[8 + 8 + 8 + (LongestLineInclusive+1+4)], 1 );
			// ]  //r.14+
			if (SomeByte != 0 ) // RW exists
                                { // Here all 'P W P' section is repeated; the way of handling case when dynamic number of words in leaf
// +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ***** 'P W P' section 2 [
// LW: existence check if ( *(char *)(PseudoLinkedPointer+4+4+4) != 0 )
// RW: existence check if ( *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) != 0 )
                        // here ALWAYS RW exists: no need for existence check - line below
                        // if ( *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) != 0 )
//                        if (memcmp(PseudoLinkedPointer+4+4+4+wrdlen,wrd,wrdlen) > 0) // go MP
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			//PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8 + (LongestLineInclusive+1+4);
			//fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			//fread(&FourGramL[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			// [  //r.14+
		  	memcpy( &FourGramL[0], &LEAF[8 + 8 + 8 + (LongestLineInclusive+1+4)], (LongestLineInclusive+1+4) );
			// ]  //r.14+
                        if (strcmpKAZE13(FourGramL, wrd) > 0) // go MP
//                           { memcpy( &PseudoLinkedPointerNEW, PseudoLinkedPointer + 4, 4 ); //MP
//                             PseudoLinkedPointer = PseudoLinkedPointerNEW;
//                           }
                           { 
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			//PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8; //MP
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
                if (StackPtr > 8192*3-1-1) { printf( "\nLeprechaun: Failure! 'B-tree order 3' simulated stack overflow!\n" ); return( 13 );}
                BSTstack[StackPtr] = PseudoLinkedPointer_64; ++StackPtr; //pt to visited leaf
                BSTstack[StackPtr] = 8; ++StackPtr; //LPoffset=0;MPoffset=8;RPoffset=16;
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			//fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			//fread(&PseudoLinkedPointer_64, 8, 1, fp_outRG);
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			// [  //r.14+
		  	memcpy( &PseudoLinkedPointer_64, &LEAF[8], 8 );
			// ]  //r.14+
                           }
//                        else if (memcmp(PseudoLinkedPointer+4+4+4+wrdlen,wrd,wrdlen) < 0) // go RP
                        else if (strcmpKAZE13(FourGramL, wrd) < 0) // go RP
//                           { // No ?W after RW - go RP
//                             memcpy( &PseudoLinkedPointerNEW, PseudoLinkedPointer + 4 + 4, 4 ); //RP
//                             PseudoLinkedPointer = PseudoLinkedPointerNEW;
//                           }
                           { 
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			//PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8; //RP
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
                if (StackPtr > 8192*3-1-1) { printf( "\nLeprechaun: Failure! 'B-tree order 3' simulated stack overflow!\n" ); return( 13 );}
                BSTstack[StackPtr] = PseudoLinkedPointer_64; ++StackPtr; //pt to visited leaf
                BSTstack[StackPtr] = 16; ++StackPtr; //LPoffset=0;MPoffset=8;RPoffset=16;
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			//fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			//fread(&PseudoLinkedPointer_64, 8, 1, fp_outRG);
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			// [  //r.14+
		  	memcpy( &PseudoLinkedPointer_64, &LEAF[8 + 8], 8 );
			// ]  //r.14+
                           }
                        else { FoundInLinkedList = 1; // wrd is RW
			// Counter [
				if (BSTorBtree == 2) {
			fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
				}
			memcpy( &CounterOccurrencies, &FourGramL[(LongestLineInclusive+1+4)-4], 4 );

				// r16 [
				if ( REUSE == 2 ) { 
				if (*argv[k_FIX] == 'W')
					fprintf(fp_out, "%s\t%s\r\n", _ui64toaKAZEzerocomma(CounterOccurrencies+1, llTOaDigits2, 10)+(26-9), wrd); //WORDcountBOTTOM++;
				if (*argv[k_FIX] == 'w')
					fprintf(fp_out, "%s\r\n", wrd); //WORDcountBOTTOM++;
				}
				// r16 ]

				if ( REUSE != 2 ) {  // r16
			if (CounterOccurrencies<9999999) CounterOccurrencies++;
			memcpy( &FourGramL[(LongestLineInclusive+1+4)-4], &CounterOccurrencies, 4 );
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			//fwrite(&FourGramL[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			// [  //r.14+
		  	memcpy( &LEAF[8 + 8 + 8 + (LongestLineInclusive+1+4)], &FourGramL[0], (LongestLineInclusive+1+4) );
				if (BSTorBtree == 2) {
			fwrite(&LEAF[0], 8+8+8+2*(LongestLineInclusive+1+4), 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( (char *)PseudoLinkedPointerAUX_64, &LEAF[0], 8+8+8+2*(LongestLineInclusive+1+4) );
				} // ########## 64bit memory manipulations ]
			// ]  //r.14+
				} // r16
			// Counter ]
			     }
                        WORDcountAttemptsToPut++;
// ***** 'P W P' section 2 ]
// +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
                                }
                             else // RW empty - go MP
//                                { memcpy( &PseudoLinkedPointerNEW, PseudoLinkedPointer + 4, 4 ); //MP
//                                  PseudoLinkedPointer = PseudoLinkedPointerNEW;
//                                }
                                { 
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			//PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8; //MP
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
                if (StackPtr > 8192*3-1-1) { printf( "\nLeprechaun: Failure! 'B-tree order 3' simulated stack overflow!\n" ); return( 13 );}
                BSTstack[StackPtr] = PseudoLinkedPointer_64; ++StackPtr; //pt to visited leaf
                BSTstack[StackPtr] = 8; ++StackPtr; //LPoffset=0;MPoffset=8;RPoffset=16;
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			//fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			//fread(&PseudoLinkedPointer_64, 8, 1, fp_outRG);
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			// [  //r.14+
		  	memcpy( &PseudoLinkedPointer_64, &LEAF[8], 8 );
			// ]  //r.14+
                                }
                           }
                        else { FoundInLinkedList = 1; // wrd is LW
			// Counter [
				if (BSTorBtree == 2) {
			fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
				}
			memcpy( &CounterOccurrencies, &FourGramL[(LongestLineInclusive+1+4)-4], 4 );

				// r16 [
				if ( REUSE == 2 ) { 
				if (*argv[k_FIX] == 'W')
					fprintf(fp_out, "%s\t%s\r\n", _ui64toaKAZEzerocomma(CounterOccurrencies+1, llTOaDigits2, 10)+(26-9), wrd); //WORDcountBOTTOM++;
				if (*argv[k_FIX] == 'w')
					fprintf(fp_out, "%s\r\n", wrd); //WORDcountBOTTOM++;
				}
				// r16 ]

				if ( REUSE != 2 ) {  // r16
			if (CounterOccurrencies<9999999) CounterOccurrencies++;
			memcpy( &FourGramL[(LongestLineInclusive+1+4)-4], &CounterOccurrencies, 4 );
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			//fwrite(&FourGramL[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			// [  //r.14+
		  	memcpy( &LEAF[8 + 8 + 8], &FourGramL[0], (LongestLineInclusive+1+4) );
				if (BSTorBtree == 2) {
			fwrite(&LEAF[0], 8+8+8+2*(LongestLineInclusive+1+4), 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( (char *)PseudoLinkedPointerAUX_64, &LEAF[0], 8+8+8+2*(LongestLineInclusive+1+4) );
				} // ########## 64bit memory manipulations ]
			// ]  //r.14+
				} // r16
			// Counter ]
			     }
                        WORDcountAttemptsToPut++;
// ***** 'P W P' section ]
                      } // while
                        WORDcountAttemptsToPut--; // - 1 due to BST way of counting i.e. direct hash hit is not counted only successors
                    }
// 1] Search ]

if ( REUSE != 2 ) { //  REUSE [ // This line comes since r16
if (DoNotInsertFlag == 0) { // This line comes since r15FIXFIX+
if (FoundInLinkedList == 0)
{
/*
// ======================= [ The whole section/sub-fragment 2 is commented due to great time differences for Internal_vs_External memory accesses - it is far more cheap to have the STACK overhead (moved to sub-fragment 1) ] ======================= [
// 2] if Search failed Trasirascht(pushing in stack PseudoLinkedPointer(visited LEAFs)) Search [
//    'TracingSearch' is the same as 'Search' except that adds the trail in my simulated stack,
//    the goal is not to waste time in 'Search' by dealing with no needed trail in case of not 'Insert'.
//    Simulated stack contains pairs of 'Address of ParentLEAF' + 'Offset of ParentPointer in ParentLEAF i.e. 0 for LP, 4 for MP, 8 for RP'.
//    'Offset ...' saves unnecessary comparisons of NEWword which after splitting goes up.
                    memcpy( &PseudoLinkedPointer, BufStart+Slot, 4 );
                StackPtr = 0;
                      while (PseudoLinkedPointer != 0)
                      { 
// ***** 'P W P' section [
// LW: existence check if ( *(char *)(PseudoLinkedPointer+4+4+4) != 0 )
// RW: existence check if ( *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) != 0 )
                        // here ALWAYS LW exists: no need for existence check - line below
                        // if ( *(char *)(PseudoLinkedPointer+4+4+4) != 0 )
                        if (memcmp(PseudoLinkedPointer+4+4+4,wrd,wrdlen) > 0) // go LP
                           { memcpy( &PseudoLinkedPointerNEW, PseudoLinkedPointer + 0, 4 ); //LP
                if (StackPtr > 8192*3-1-1) { printf( "\nLeprechaun: Failure! 'B-tree order 3' simulated stack overflow!\n" ); return( 13 );}
                BSTstack[StackPtr] = PseudoLinkedPointer; ++StackPtr; //pt to visited leaf
                BSTstack[StackPtr] = 0; ++StackPtr; //LPoffset=0;MPoffset=4;RPoffset=8;
                             PseudoLinkedPointer = PseudoLinkedPointerNEW;
                           }
                        else if (memcmp(PseudoLinkedPointer+4+4+4,wrd,wrdlen) < 0) // go RP or MP
                           { // RW existence check - line below:
                             if ( *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) != 0 ) // RW exists
                                { // Here all 'P W P' section is repeated; the way of handling case when dynamic number of words in leaf
// +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// ***** 'P W P' section 2 [
// LW: existence check if ( *(char *)(PseudoLinkedPointer+4+4+4) != 0 )
// RW: existence check if ( *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) != 0 )
                        // here ALWAYS RW exists: no need for existence check - line below
                        // if ( *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) != 0 )
                        if (memcmp(PseudoLinkedPointer+4+4+4+wrdlen,wrd,wrdlen) > 0) // go MP
                           { memcpy( &PseudoLinkedPointerNEW, PseudoLinkedPointer + 4, 4 ); //MP
                if (StackPtr > 8192*3-1-1) { printf( "\nLeprechaun: Failure! 'B-tree order 3' simulated stack overflow!\n" ); return( 13 );}
                BSTstack[StackPtr] = PseudoLinkedPointer; ++StackPtr; //pt to visited leaf
                BSTstack[StackPtr] = 4; ++StackPtr; //LPoffset=0;MPoffset=4;RPoffset=8;
                             PseudoLinkedPointer = PseudoLinkedPointerNEW;
                           }
                        else if (memcmp(PseudoLinkedPointer+4+4+4+wrdlen,wrd,wrdlen) < 0) // go RP
                           { // No ?W after RW - go RP
                             memcpy( &PseudoLinkedPointerNEW, PseudoLinkedPointer + 4 + 4, 4 ); //RP
                if (StackPtr > 8192*3-1-1) { printf( "\nLeprechaun: Failure! 'B-tree order 3' simulated stack overflow!\n" ); return( 13 );}
                BSTstack[StackPtr] = PseudoLinkedPointer; ++StackPtr; //pt to visited leaf
                BSTstack[StackPtr] = 8; ++StackPtr; //LPoffset=0;MPoffset=4;RPoffset=8;
                             PseudoLinkedPointer = PseudoLinkedPointerNEW;
                           }
                        else FoundInLinkedList = 1; // wrd is RW
// ***** 'P W P' section 2 ]
// +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
                                }
                             else // RW empty - go MP
                                { memcpy( &PseudoLinkedPointerNEW, PseudoLinkedPointer + 4, 4 ); //MP
                if (StackPtr > 8192*3-1-1) { printf( "\nLeprechaun: Failure! 'B-tree order 3' simulated stack overflow!\n" ); return( 13 );}
                BSTstack[StackPtr] = PseudoLinkedPointer; ++StackPtr; //pt to visited leaf
                BSTstack[StackPtr] = 4; ++StackPtr; //LPoffset=0;MPoffset=4;RPoffset=8;
                                  PseudoLinkedPointer = PseudoLinkedPointerNEW;
                                }
                           }
                        else FoundInLinkedList = 1; // wrd is LW
// ***** 'P W P' section ]
                      } // while
// 2] if Search failed Trasirascht(pushing in stack PseudoLinkedPointer(visited LEAFs)) Search ]
// ======================= [ The whole section/sub-fragment 2 is commented due to great time differences for Internal_vs_External memory accesses - it is far more cheap to have the STACK overhead (moved to sub-fragment 1) ] ======================= ]
*/

// 3] Insert Iterative [
//    There are total 4 situations:
//    Case #1: Outer NODE(including ROOT) [  ][  ][  ][LW][  ]
//    Case #2: Outer NODE(including ROOT) [  ][  ][  ][LW][RW]  Split Occurs   -----  
//    Case #3: ROOT                       [LP][MP][  ][LW][  ]                     |  'wrdUP' (wrdlen bytes) 
//    Case #4: Inner NODE(including ROOT) [LP][MP][RP][LW][RW]  Split Occurs   --- |   &
//                                                                               | |  'PseudoLinkedPointerNEW' (ptr to NEW LEAF)
//    There are total 2 situations for PARENT LEAF: <-------------------------------  ARE GOING UP
//    Case #3: [LP][MP][  ][LW][  ]  
//    Case #4: [LP][MP][RP][LW][RW]  Split Occurs

// ~  First deal alonely with the OUTER NODE(LEAF) where Search stopped i.e Case #1 & Case #2:
        POffsetInLEAF = BSTstack[--StackPtr];
        PseudoLinkedPointer_64 = BSTstack[--StackPtr];
// NOTE: ONE LEAF IS FULL ONLY WHEN LAST CELL FOR KEY(here RW) EXISTS!
// RW: existence check if ( *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) != 0 )
//if ( *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) != 0 ) // If LEAF is full: Case #2
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			//PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8 + (LongestLineInclusive+1+4); //RW
			//fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
		       	//fread(&SomeByte, 1, 1, fp_outRG);
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			// [  //r.14+
			PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64;
				if (BSTorBtree == 2) {
			fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			fread(&LEAF[0], 8+8+8+2*(LongestLineInclusive+1+4), 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( &LEAF[0], (char *)PseudoLinkedPointerAUX_64, 8+8+8+2*(LongestLineInclusive+1+4) );
				} // ########## 64bit memory manipulations ]
		  	memcpy( &SomeByte, &LEAF[8 + 8 + 8 + (LongestLineInclusive+1+4)], 1 );
			// ]  //r.14+
			if (SomeByte != 0 ) // RW exists
    { SplitOccured = 1; WORDcountDistinct++;
            // ALlocate NEW LEAF:
//            if( (unsigned long)(bufend[LetterOffset] - BufStart) + 2*wrdlen + 4 + 4 + 4 < GRMBLFoolAgain[(int)wrdlen] ) // +4 more for BST instead of LL; + more(see LEAF)
//                      {
//                      memcpy( &PseudoLinkedPointerNEW, &bufend[LetterOffset], 4 );
//                      bufend[LetterOffset] = bufend[LetterOffset] + 4 + 4 + 4; // + 4 due to above commenting
//                      bufend[LetterOffset] = bufend[LetterOffset] + 2*wrdlen;
//                      if (MAXusedBuffer[wrdlen] < (unsigned long)(bufend[LetterOffset] - BufStart)) {MAXusedBuffer[wrdlen] = (unsigned long)(bufend[LetterOffset] - BufStart);}
//                      }
            if( 8 + 8 + 8 + 2*(LongestLineInclusive+1+4) < size_in64_L14 - (BufEnd_64-(unsigned long long)pointerflush_64) ) // the longest wrdlen is LongestLineInclusive but actual is LongestLineInclusive(+ CR char)
                      {
			PseudoLinkedPointerNEW_64 = BufEnd_64;
			BufEnd_64 = BufEnd_64 + 8 + 8 + 8;
			BufEnd_64 = BufEnd_64 + 2*(LongestLineInclusive+1+4);
                      }
            else
            { printf( "\nLeprechaun: Failure! Increment 'Memory for each Letter' parameter(third one)!\n" );
	      fprintf( fp_outLOG, "Word count: %s of them %s distinct\n", _ui64toaKAZEcomma(WORDcount, llTOaDigits, 10), _ui64toaKAZEcomma((unsigned long long)WORDcountDistinct, llTOaDigits2, 10) );
	      fprintf( fp_outLOG, "Allocated memory: %s bytes\n", _ui64toaKAZEcomma(size_in64_L14, llTOaDigits, 10) );
	      fprintf( fp_outLOG, "Total Attempts to Find/Put WORDs into B-trees order 3: %s\n", _ui64toaKAZEcomma(WORDcountAttemptsToPut, llTOaDigits, 10) );
	      fprintf( fp_outLOG, "Used value for third parameter in KB: %lu\n", (unsigned long)Thunderwith );
	      fprintf( fp_outLOG, "Leprechaun: Failure! Increment 'Memory for each Letter' parameter(third one)!\n\n");
              return( 1 );
            }
      if (POffsetInLEAF == 0) // wrd < LW
         {
//                      memcpy( wrdUP, PseudoLinkedPointer+4+4+4, wrdlen ); // LW up
//                      memcpy( PseudoLinkedPointer+4+4+4, wrd, wrdlen );   // wrd go to OLD LEAF
//                      memcpy( PseudoLinkedPointerNEW+4+4+4, PseudoLinkedPointer+4+4+4+wrdlen, wrdlen ); // RW go to NEW LEAF
//                      *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) = 0;    // RW mark unused in OLD LEAF
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			//PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8;
			//fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			//fread(&wrdUP[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			 //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			 //fwrite(&wrd[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			  //PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8 + (LongestLineInclusive+1+4);
			  //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			  //fread(&wrdAUX[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			  //PseudoLinkedPointerAUX_64 = PseudoLinkedPointerNEW_64 + 8 + 8 + 8;
			  //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			  //fwrite(&wrdAUX[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			   //PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8 + (LongestLineInclusive+1+4);
			   //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
		       	   //fwrite(&OneChar_ieByte, 1, 1, fp_outRG); // Write ZERO ASCII code
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			// [  //r.14+
		  	memcpy( &wrdUP[0], &LEAF[8 + 8 + 8], (LongestLineInclusive+1+4) );
		  	memcpy( &LEAF[8 + 8 + 8], &wrd[0], (LongestLineInclusive+1+4) );
		  	memcpy( &wrdAUX[0], &LEAF[8 + 8 + 8 + (LongestLineInclusive+1+4)], (LongestLineInclusive+1+4) );
			// Here reordering (of writing wrdAUX) is needed to avoid seek the position NEW and stupidly to seek again OLD/current position!
		  	memcpy( &LEAF[8 + 8 + 8 + (LongestLineInclusive+1+4)], &OneChar_ieByte, 1 );
				if (BSTorBtree == 2) {
			fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			fwrite(&LEAF[0], 8+8+8+2*(LongestLineInclusive+1+4), 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( (char *)PseudoLinkedPointerAUX_64, &LEAF[0], 8+8+8+2*(LongestLineInclusive+1+4) );
				} // ########## 64bit memory manipulations ]
			  PseudoLinkedPointerAUX_64 = PseudoLinkedPointerNEW_64 + 8 + 8 + 8;
				if (BSTorBtree == 2) {
			  fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			  fwrite(&wrdAUX[0], (LongestLineInclusive+1+4), 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( (char *)PseudoLinkedPointerAUX_64, &wrdAUX[0], (LongestLineInclusive+1+4) );
				} // ########## 64bit memory manipulations ]
			// ]  //r.14+
         }
      if (POffsetInLEAF == 8) // LW < wrd < RW
         {
//                      memcpy( wrdUP, wrd, wrdlen );                       // wrd up
//                      memcpy( PseudoLinkedPointerNEW+4+4+4, PseudoLinkedPointer+4+4+4+wrdlen, wrdlen ); // RW go to NEW LEAF
//                      *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) = 0;    // RW mark unused in OLD LEAF
                        memcpy( wrdUP, wrd, (LongestLineInclusive+1+4) );     // wrd up
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			  //PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8 + (LongestLineInclusive+1+4);
			  //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			  //fread(&wrdAUX[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			  //PseudoLinkedPointerAUX_64 = PseudoLinkedPointerNEW_64 + 8 + 8 + 8;
			  //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			  //fwrite(&wrdAUX[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			   //PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8 + (LongestLineInclusive+1+4);
			   //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
		       	   //fwrite(&OneChar_ieByte, 1, 1, fp_outRG); // Write ZERO ASCII code
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			// [  //r.14+
		  	memcpy( &wrdAUX[0], &LEAF[8 + 8 + 8 + (LongestLineInclusive+1+4)], (LongestLineInclusive+1+4) );
			// Here reordering (of writing wrdAUX) is needed to avoid seek the position NEW and stupidly to seek again OLD/current position!
		  	memcpy( &LEAF[8 + 8 + 8 + (LongestLineInclusive+1+4)], &OneChar_ieByte, 1 );
				if (BSTorBtree == 2) {
			fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			fwrite(&LEAF[0], 8+8+8+2*(LongestLineInclusive+1+4), 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( (char *)PseudoLinkedPointerAUX_64, &LEAF[0], 8+8+8+2*(LongestLineInclusive+1+4) );
				} // ########## 64bit memory manipulations ]
			  PseudoLinkedPointerAUX_64 = PseudoLinkedPointerNEW_64 + 8 + 8 + 8;
				if (BSTorBtree == 2) {
			  fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			  fwrite(&wrdAUX[0], (LongestLineInclusive+1+4), 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( (char *)PseudoLinkedPointerAUX_64, &wrdAUX[0], (LongestLineInclusive+1+4) );
				} // ########## 64bit memory manipulations ]
			// ]  //r.14+
         }
      if (POffsetInLEAF == 16) // wrd > RW
         {
//                      memcpy( wrdUP, PseudoLinkedPointer+4+4+4+wrdlen, wrdlen ); // RW up
//                      *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) = 0;    // RW mark unused in OLD LEAF
//                      memcpy( PseudoLinkedPointerNEW+4+4+4, wrd, wrdlen );   // wrd go to NEW LEAF
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			//PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8 + (LongestLineInclusive+1+4);
			//fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			//fread(&wrdUP[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			 //PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8 + (LongestLineInclusive+1+4);
			 //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
		       	 //fwrite(&OneChar_ieByte, 1, 1, fp_outRG); // Write ZERO ASCII code
			  //PseudoLinkedPointerAUX_64 = PseudoLinkedPointerNEW_64 + 8 + 8 + 8;
			  //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			  //fwrite(&wrd[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			// [  //r.14+
		  	memcpy( &wrdUP[0], &LEAF[8 + 8 + 8 + (LongestLineInclusive+1+4)], (LongestLineInclusive+1+4) );
		  	memcpy( &LEAF[8 + 8 + 8 + (LongestLineInclusive+1+4)], &OneChar_ieByte, 1 );
				if (BSTorBtree == 2) {
			fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			fwrite(&LEAF[0], 8+8+8+2*(LongestLineInclusive+1+4), 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( (char *)PseudoLinkedPointerAUX_64, &LEAF[0], 8+8+8+2*(LongestLineInclusive+1+4) );
				} // ########## 64bit memory manipulations ]
			// ]  //r.14+
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one! Here NO need!
			  PseudoLinkedPointerAUX_64 = PseudoLinkedPointerNEW_64 + 8 + 8 + 8;
				if (BSTorBtree == 2) {
			  fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			  fwrite(&wrd[0], (LongestLineInclusive+1+4), 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( (char *)PseudoLinkedPointerAUX_64, &wrd[0], (LongestLineInclusive+1+4) );
				} // ########## 64bit memory manipulations ]
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one! Here NO need!
         }
    }
else // If LEAF is not full: Case #1
    { SplitOccured = 0; WORDcountDistinct++;
      if (POffsetInLEAF == 0) // wrd < [LW][] so [LW][] -> [][LW] -> [wrd][LW]
         {
//                      memcpy( PseudoLinkedPointer+4+4+4+wrdlen, PseudoLinkedPointer+4+4+4, wrdlen );
//                      memcpy( PseudoLinkedPointer+4+4+4, wrd, wrdlen );
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			 //PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8;
			 //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			 //fread(&wrdAUX[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			 //PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8 + (LongestLineInclusive+1+4);
			 //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			 //fwrite(&wrdAUX[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			  //PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8;
			  //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			  //fwrite(&wrd[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			// [  //r.14+
		  	memcpy( &wrdAUX[0], &LEAF[8 + 8 + 8], (LongestLineInclusive+1+4) );
		  	memcpy( &LEAF[8 + 8 + 8 + (LongestLineInclusive+1+4)], &wrdAUX[0], (LongestLineInclusive+1+4) );
		  	memcpy( &LEAF[8 + 8 + 8], &wrd[0], (LongestLineInclusive+1+4) );
				if (BSTorBtree == 2) {
			fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			fwrite(&LEAF[0], 8+8+8+2*(LongestLineInclusive+1+4), 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( (char *)PseudoLinkedPointerAUX_64, &LEAF[0], 8+8+8+2*(LongestLineInclusive+1+4) );
				} // ########## 64bit memory manipulations ]
			// ]  //r.14+

         }
      if (POffsetInLEAF == 8) // wrd > [LW][] so [LW][] -> [LW][wrd]
         {
//                      memcpy( PseudoLinkedPointer+4+4+4+wrdlen, wrd, wrdlen );
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one! Here NO need!
			  PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8 + (LongestLineInclusive+1+4);
				if (BSTorBtree == 2) {
			  fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			  fwrite(&wrd[0], (LongestLineInclusive+1+4), 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( (char *)PseudoLinkedPointerAUX_64, &wrd[0], (LongestLineInclusive+1+4) );
				} // ########## 64bit memory manipulations ]
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one! Here NO need!
         }
    }

if (SplitOccured != 0)
{
// ~  Second deal with the INNER NODE(S) i.e Case #3 & Case #4:
        while (StackPtr != 0 || SplitOccured != 0)
              {
        // 'PseudoLinkedPointerNEW' is new LEAF to be inserted
        // 'wrdUP' is NEW word to be inserted
        if (StackPtr != 0)
        {
           POffsetInLEAF = BSTstack[--StackPtr];
           PseudoLinkedPointer_64 = BSTstack[--StackPtr];
//if ( *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) != 0 ) // If LEAF is full: Case #4
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			//PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8 + (LongestLineInclusive+1+4); //RW
			//fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
		       	//fread(&SomeByte, 1, 1, fp_outRG);
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			// [  //r.14+
			PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64;
				if (BSTorBtree == 2) {
			fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			fread(&LEAF[0], 8+8+8+2*(LongestLineInclusive+1+4), 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( &LEAF[0], (char *)PseudoLinkedPointerAUX_64, 8+8+8+2*(LongestLineInclusive+1+4) );
				} // ########## 64bit memory manipulations ]
		  	memcpy( &SomeByte, &LEAF[8 + 8 + 8 + (LongestLineInclusive+1+4)], 1 );
			// ]  //r.14+
			if (SomeByte != 0 ) // RW exists
    { SplitOccured = 1;
//                      memcpy( wrdUPold, wrdUP, wrdlen ); // LW up
//                      PseudoLinkedPointerNEWold = PseudoLinkedPointerNEW;
                        memcpy( wrdUPold, wrdUP, (LongestLineInclusive+1+4) );
                        PseudoLinkedPointerNEWold_64 = PseudoLinkedPointerNEW_64;
            // ALlocate NEW LEAF:
//            if( (unsigned long)(bufend[LetterOffset] - BufStart) + 2*wrdlen + 4 + 4 + 4 < GRMBLFoolAgain[(int)wrdlen] ) // +4 more for BST instead of LL; + more(see LEAF)
//                      {
//                      memcpy( &PseudoLinkedPointerNEW, &bufend[LetterOffset], 4 );
//                      bufend[LetterOffset] = bufend[LetterOffset] + 4 + 4 + 4; // + 4 due to above commenting
//                      bufend[LetterOffset] = bufend[LetterOffset] + 2*wrdlen;
//                      if (MAXusedBuffer[wrdlen] < (unsigned long)(bufend[LetterOffset] - BufStart)) {MAXusedBuffer[wrdlen] = (unsigned long)(bufend[LetterOffset] - BufStart);}
//                      }
            if( 8 + 8 + 8 + 2*(LongestLineInclusive+1+4) < size_in64_L14 - (BufEnd_64-(unsigned long long)pointerflush_64) ) // the longest wrdlen is LongestLineInclusive but actual is LongestLineInclusive(+ CR char)
                      {
			PseudoLinkedPointerNEW_64 = BufEnd_64;
			BufEnd_64 = BufEnd_64 + 8 + 8 + 8;
			BufEnd_64 = BufEnd_64 + 2*(LongestLineInclusive+1+4);
			// [  //r.14+
			//PseudoLinkedPointerAUX_64 = PseudoLinkedPointerNEW_64;
			//fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			//fread(&LEAFNEW[0], 8+8+8+2*(LongestLineInclusive+1+4), 1, fp_outRG);
			// In fact above three lines are slow, the only need is ZEROed LEAFNEW.
			memset(&LEAFNEW[0],0,8+8+8+2*(LongestLineInclusive+1+4));
			// ]  //r.14+
                      }
            else
            { printf( "\nLeprechaun: Failure! Increment 'Memory for each Letter' parameter(third one)!\n" );
	      fprintf( fp_outLOG, "Word count: %s of them %s distinct\n", _ui64toaKAZEcomma(WORDcount, llTOaDigits, 10), _ui64toaKAZEcomma((unsigned long long)WORDcountDistinct, llTOaDigits2, 10) );
	      fprintf( fp_outLOG, "Allocated memory: %s bytes\n", _ui64toaKAZEcomma(size_in64_L14, llTOaDigits, 10) );
	      fprintf( fp_outLOG, "Total Attempts to Find/Put WORDs into B-trees order 3: %s\n", _ui64toaKAZEcomma(WORDcountAttemptsToPut, llTOaDigits, 10) );
	      fprintf( fp_outLOG, "Used value for third parameter in KB: %lu\n", (unsigned long)Thunderwith );
	      fprintf( fp_outLOG, "Leprechaun: Failure! Increment 'Memory for each Letter' parameter(third one)!\n\n");
              return( 1 );
            }
      if (POffsetInLEAF == 0) // wrdUPold < LW
         {
//                      memcpy( wrdUP, PseudoLinkedPointer+4+4+4, wrdlen ); // LW up
//                      memcpy( PseudoLinkedPointer+4+4+4, wrdUPold, wrdlen );   // wrdUPold go to OLD LEAF
//                      memcpy( PseudoLinkedPointerNEW+4+4+4, PseudoLinkedPointer+4+4+4+wrdlen, wrdlen ); // RW go to NEW LEAF
//                      *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) = 0;    // RW mark unused in OLD LEAF
//                      // [LP](PseudoLinkedPointerNEWold)[MP][RP](wrdUPold)[LW][RW]       ------
//                      //              pair [LW] PseudoLinkedPointerNEW goes up                |
//                      // PseudoLinkedPointer:                         PseudoLinkedPointerNEW: |
//                      // [LP](PseudoLinkedPointerNEWold)[](wrdUPold)  [MP][RP][][RW]      <----
//                      // no need to put zero in RP because logic is based on words existence:
//                      memcpy( PseudoLinkedPointerNEW+0, PseudoLinkedPointer+4, 4 );  
//                      memcpy( PseudoLinkedPointerNEW+4, PseudoLinkedPointer+8, 4 );  
//                      memcpy( PseudoLinkedPointer+4, &PseudoLinkedPointerNEWold, 4 );
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			//PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8;
			//fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			//fread(&wrdUP[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			 //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			 //fwrite(&wrdUPold[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			  //PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8 + (LongestLineInclusive+1+4);
			  //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			  //fread(&wrdAUX[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			  //PseudoLinkedPointerAUX_64 = PseudoLinkedPointerNEW_64 + 8 + 8 + 8;
			  //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			  //fwrite(&wrdAUX[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			   //PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8 + (LongestLineInclusive+1+4);
			   //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
		       	   //fwrite(&OneChar_ieByte, 1, 1, fp_outRG); // Write ZERO ASCII code
			//PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8;
			//fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			//fread(&PseudoLinkedPointerAUXdumbo_64, 8, 1, fp_outRG);
			//PseudoLinkedPointerAUX_64 = PseudoLinkedPointerNEW_64 + 0;
			//fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			//fwrite(&PseudoLinkedPointerAUXdumbo_64, 8, 1, fp_outRG);
			 //PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 16;
			 //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			 //fread(&PseudoLinkedPointerAUXdumbo_64, 8, 1, fp_outRG);
			 //PseudoLinkedPointerAUX_64 = PseudoLinkedPointerNEW_64 + 8;
			 //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			 //fwrite(&PseudoLinkedPointerAUXdumbo_64, 8, 1, fp_outRG);
			  //PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8;
			  //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			  //fwrite(&PseudoLinkedPointerNEWold_64, 8, 1, fp_outRG);
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			// [  //r.14+
		  	memcpy( &wrdUP[0], &LEAF[8 + 8 + 8], (LongestLineInclusive+1+4) );
		  	memcpy( &LEAF[8 + 8 + 8], &wrdUPold[0], (LongestLineInclusive+1+4) );
		  	memcpy( &wrdAUX[0], &LEAF[8 + 8 + 8 + (LongestLineInclusive+1+4)], (LongestLineInclusive+1+4) );
		  	memcpy( &LEAFNEW[8 + 8 + 8], &wrdAUX[0], (LongestLineInclusive+1+4) );
		  	memcpy( &LEAF[8 + 8 + 8 + (LongestLineInclusive+1+4)], &OneChar_ieByte, 1 );
		  	memcpy( &PseudoLinkedPointerAUXdumbo_64, &LEAF[8], 8 );
		  	memcpy( &LEAFNEW[0], &PseudoLinkedPointerAUXdumbo_64, 8 );
		  	memcpy( &PseudoLinkedPointerAUXdumbo_64, &LEAF[16], 8 );
		  	memcpy( &LEAFNEW[8], &PseudoLinkedPointerAUXdumbo_64, 8 );
		  	memcpy( &LEAF[8], &PseudoLinkedPointerNEWold_64, 8 );
			PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64;
				if (BSTorBtree == 2) {
			fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			fwrite(&LEAF[0], 8+8+8+2*(LongestLineInclusive+1+4), 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( (char *)PseudoLinkedPointerAUX_64, &LEAF[0], 8+8+8+2*(LongestLineInclusive+1+4) );
				} // ########## 64bit memory manipulations ]
			PseudoLinkedPointerAUX_64 = PseudoLinkedPointerNEW_64;
				if (BSTorBtree == 2) {
			fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			fwrite(&LEAFNEW[0], 8+8+8+2*(LongestLineInclusive+1+4), 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( (char *)PseudoLinkedPointerAUX_64, &LEAFNEW[0], 8+8+8+2*(LongestLineInclusive+1+4) );
				} // ########## 64bit memory manipulations ]
			// ]  //r.14+
         }
      if (POffsetInLEAF == 8) // LW < wrdUPold < RW
         {
//                      memcpy( wrdUP, wrdUPold, wrdlen );                       // wrdUPold up
//                      memcpy( PseudoLinkedPointerNEW+4+4+4, PseudoLinkedPointer+4+4+4+wrdlen, wrdlen ); // RW go to NEW LEAF
//                      *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) = 0;    // RW mark unused in OLD LEAF
//                      // [LP][MP](PseudoLinkedPointerNEWold)[RP][LW](wrdUPold)[RW]       ------
//                      //              pair [wrdUPold] PseudoLinkedPointerNEW goes up          |
//                      // PseudoLinkedPointer:  PseudoLinkedPointerNEW:                        |
//                      // [LP][MP][][LW]        (PseudoLinkedPointerNEWold)[RP][][RW]      <----
//                      // no need to put zero in RP because logic is based on words existence:
//                      memcpy( PseudoLinkedPointerNEW+0, &PseudoLinkedPointerNEWold, 4 );  
//                      memcpy( PseudoLinkedPointerNEW+4, PseudoLinkedPointer+8, 4 );  
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
                        memcpy( wrdUP, wrdUPold, (LongestLineInclusive+1+4) );
			  //PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8 + (LongestLineInclusive+1+4);
			  //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			  //fread(&wrdAUX[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			  //PseudoLinkedPointerAUX_64 = PseudoLinkedPointerNEW_64 + 8 + 8 + 8;
			  //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			  //fwrite(&wrdAUX[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			   //PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8 + (LongestLineInclusive+1+4);
			   //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
		       	   //fwrite(&OneChar_ieByte, 1, 1, fp_outRG); // Write ZERO ASCII code
			//PseudoLinkedPointerAUX_64 = PseudoLinkedPointerNEW_64 + 0;
			//fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			//fwrite(&PseudoLinkedPointerNEWold_64, 8, 1, fp_outRG);
			 //PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 16;
			 //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			 //fread(&PseudoLinkedPointerAUXdumbo_64, 8, 1, fp_outRG);
			 //PseudoLinkedPointerAUX_64 = PseudoLinkedPointerNEW_64 + 8;
			 //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			 //fwrite(&PseudoLinkedPointerAUXdumbo_64, 8, 1, fp_outRG);
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			// [  //r.14+
		  	memcpy( &wrdAUX[0], &LEAF[8 + 8 + 8 + (LongestLineInclusive+1+4)], (LongestLineInclusive+1+4) );
		  	memcpy( &LEAFNEW[8 + 8 + 8], &wrdAUX[0], (LongestLineInclusive+1+4) );
		  	memcpy( &LEAF[8 + 8 + 8 + (LongestLineInclusive+1+4)], &OneChar_ieByte, 1 );
		  	memcpy( &LEAFNEW[0], &PseudoLinkedPointerNEWold_64, 8 );
		  	memcpy( &PseudoLinkedPointerAUXdumbo_64, &LEAF[16], 8 );
		  	memcpy( &LEAFNEW[8], &PseudoLinkedPointerAUXdumbo_64, 8 );
			PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64;
				if (BSTorBtree == 2) {
			fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			fwrite(&LEAF[0], 8+8+8+2*(LongestLineInclusive+1+4), 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( (char *)PseudoLinkedPointerAUX_64, &LEAF[0], 8+8+8+2*(LongestLineInclusive+1+4) );
				} // ########## 64bit memory manipulations ]
			PseudoLinkedPointerAUX_64 = PseudoLinkedPointerNEW_64;
				if (BSTorBtree == 2) {
			fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			fwrite(&LEAFNEW[0], 8+8+8+2*(LongestLineInclusive+1+4), 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( (char *)PseudoLinkedPointerAUX_64, &LEAFNEW[0], 8+8+8+2*(LongestLineInclusive+1+4) );
				} // ########## 64bit memory manipulations ]
			// ]  //r.14+
         }
      if (POffsetInLEAF == 16) // wrdUPold > RW
         {
//                      memcpy( wrdUP, PseudoLinkedPointer+4+4+4+wrdlen, wrdlen ); // RW up
//                      *(char *)(PseudoLinkedPointer+4+4+4+wrdlen) = 0;    // RW mark unused in OLD LEAF
//                      memcpy( PseudoLinkedPointerNEW+4+4+4, wrdUPold, wrdlen );   // wrdUPold go to NEW LEAF
//                      // [LP][MP][RP](PseudoLinkedPointerNEWold)[LW][RW](wrdUPold)       ------
//                      //              pair [RW] PseudoLinkedPointerNEW goes up                |
//                      // PseudoLinkedPointer:  PseudoLinkedPointerNEW:                        |
//                      // [LP][MP][][LW]        [RP](PseudoLinkedPointerNEWold)[](wrdUPold) <---
//                      // no need to put zero in RP because logic is based on words existence:
//                      memcpy( PseudoLinkedPointerNEW+0, PseudoLinkedPointer+8, 4 );  
//                      memcpy( PseudoLinkedPointerNEW+4, &PseudoLinkedPointerNEWold, 4 );
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			//PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8 + (LongestLineInclusive+1+4);
			//fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			//fread(&wrdUP[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			 //PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8 + (LongestLineInclusive+1+4);
			 //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
		       	 //fwrite(&OneChar_ieByte, 1, 1, fp_outRG); // Write ZERO ASCII code
			  //PseudoLinkedPointerAUX_64 = PseudoLinkedPointerNEW_64 + 8 + 8 + 8;
			  //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			  //fwrite(&wrdUPold[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			//PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 16;
			//fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			//fread(&PseudoLinkedPointerAUXdumbo_64, 8, 1, fp_outRG);
			//PseudoLinkedPointerAUX_64 = PseudoLinkedPointerNEW_64 + 0;
			//fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			//fwrite(&PseudoLinkedPointerAUXdumbo_64, 8, 1, fp_outRG);
			 //PseudoLinkedPointerAUX_64 = PseudoLinkedPointerNEW_64 + 8;
			 //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			 //fwrite(&PseudoLinkedPointerNEWold_64, 8, 1, fp_outRG);
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			// [  //r.14+
		  	memcpy( &wrdUP[0], &LEAF[8 + 8 + 8 + (LongestLineInclusive+1+4)], (LongestLineInclusive+1+4) );
		  	memcpy( &LEAF[8 + 8 + 8 + (LongestLineInclusive+1+4)], &OneChar_ieByte, 1 );
		  	memcpy( &LEAFNEW[8 + 8 + 8], &wrdUPold[0], (LongestLineInclusive+1+4) );
		  	memcpy( &PseudoLinkedPointerAUXdumbo_64, &LEAF[16], 8 );
		  	memcpy( &LEAFNEW[0], &PseudoLinkedPointerAUXdumbo_64, 8 );
		  	memcpy( &LEAFNEW[8], &PseudoLinkedPointerNEWold_64, 8 );
			PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64;
				if (BSTorBtree == 2) {
			fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			fwrite(&LEAF[0], 8+8+8+2*(LongestLineInclusive+1+4), 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( (char *)PseudoLinkedPointerAUX_64, &LEAF[0], 8+8+8+2*(LongestLineInclusive+1+4) );
				} // ########## 64bit memory manipulations ]
			PseudoLinkedPointerAUX_64 = PseudoLinkedPointerNEW_64;
				if (BSTorBtree == 2) {
			fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			fwrite(&LEAFNEW[0], 8+8+8+2*(LongestLineInclusive+1+4), 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( (char *)PseudoLinkedPointerAUX_64, &LEAFNEW[0], 8+8+8+2*(LongestLineInclusive+1+4) );
				} // ########## 64bit memory manipulations ]
			// ]  //r.14+
         }
    }
else // If LEAF is not full: Case #3
    { SplitOccured = 0; 
      if (POffsetInLEAF == 0) // wrdUP < [LW][] so [LW][] -> [][LW] -> [wrdUP][LW]
         {
//                      memcpy( PseudoLinkedPointer+4+4+4+wrdlen, PseudoLinkedPointer+4+4+4, wrdlen );
//                      memcpy( PseudoLinkedPointer+4+4+4, wrdUP, wrdlen );
//                              // [LP][MP][] -> [LP][][MP] -> [LP][np][MP]
//                      memcpy( PseudoLinkedPointer+8, PseudoLinkedPointer+4, 4 );  
//                      memcpy( PseudoLinkedPointer+4, &PseudoLinkedPointerNEW, 4 );
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			//PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8;
			//fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			//fread(&wrdAUX[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			//PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8 + (LongestLineInclusive+1+4);
			//fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			//fwrite(&wrdAUX[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			 //PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8;
			 //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			 //fwrite(&wrdUP[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			//PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8;
			//fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			//fread(&PseudoLinkedPointerAUXdumbo_64, 8, 1, fp_outRG);
			//PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 16;
			//fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			//fwrite(&PseudoLinkedPointerAUXdumbo_64, 8, 1, fp_outRG);
			 //PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8;
			 //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			 //fwrite(&PseudoLinkedPointerNEW_64, 8, 1, fp_outRG);
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			// [  //r.14+
		  	memcpy( &wrdAUX[0], &LEAF[8 + 8 + 8], (LongestLineInclusive+1+4) );
		  	memcpy( &LEAF[8 + 8 + 8 + (LongestLineInclusive+1+4)], &wrdAUX[0], (LongestLineInclusive+1+4) );
		  	memcpy( &LEAF[8 + 8 + 8], &wrdUP[0], (LongestLineInclusive+1+4) );
		  	memcpy( &PseudoLinkedPointerAUXdumbo_64, &LEAF[8], 8 );
		  	memcpy( &LEAF[8 + 8], &PseudoLinkedPointerAUXdumbo_64, 8 );
		  	memcpy( &LEAF[8], &PseudoLinkedPointerNEW_64, 8 );
				if (BSTorBtree == 2) {
			fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			fwrite(&LEAF[0], 8+8+8+2*(LongestLineInclusive+1+4), 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( (char *)PseudoLinkedPointerAUX_64, &LEAF[0], 8+8+8+2*(LongestLineInclusive+1+4) );
				} // ########## 64bit memory manipulations ]
			// ]  //r.14+
         }
      if (POffsetInLEAF == 8) // wrdUP > [LW][] so [LW][] -> [LW][wrdUP]
         {
//                      memcpy( PseudoLinkedPointer+4+4+4+wrdlen, wrdUP, wrdlen );
//                              // [LP][MP][] -> [LP][MP][np]
//                      memcpy( PseudoLinkedPointer+8, &PseudoLinkedPointerNEW, 4 );
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			//PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8 + (LongestLineInclusive+1+4);
			//fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			//fwrite(&wrdUP[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			 //PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 16;
			 //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			 //fwrite(&PseudoLinkedPointerNEW_64, 8, 1, fp_outRG);
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			// [  //r.14+
		  	memcpy( &LEAF[8 + 8 + 8 + (LongestLineInclusive+1+4)], &wrdUP[0], (LongestLineInclusive+1+4) );
		  	memcpy( &LEAF[8 + 8], &PseudoLinkedPointerNEW_64, 8 );
				if (BSTorBtree == 2) {
			fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			fwrite(&LEAF[0], 8+8+8+2*(LongestLineInclusive+1+4), 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( (char *)PseudoLinkedPointerAUX_64, &LEAF[0], 8+8+8+2*(LongestLineInclusive+1+4) );
				} // ########## 64bit memory manipulations ]
			// ]  //r.14+
         }
           break; 
    }
        }
        else // Empty stack means ROOT and more over ROOT is already splitted(Case #4 is off)
        {
     // If LEAF is not full: Case #3
     // THIS IS WHERE A NEW(SECOND) LEAF 'PseudoLinkedPointerROOT' must be allocated:
//            if( (unsigned long)(bufend[LetterOffset] - BufStart) + 2*wrdlen + 4 + 4 + 4 < GRMBLFoolAgain[(int)wrdlen] ) // +4 more for BST instead of LL; + more(see LEAF)
//                      {
//                      memcpy( &PseudoLinkedPointerROOT, &bufend[LetterOffset], 4 );
//                      bufend[LetterOffset] = bufend[LetterOffset] + 4 + 4 + 4; // + 4 due to above commenting
//                      memcpy( bufend[LetterOffset], wrdUP, wrdlen ); 
//                      bufend[LetterOffset] = bufend[LetterOffset] + 2*wrdlen;
//                      if (MAXusedBuffer[wrdlen] < (unsigned long)(bufend[LetterOffset] - BufStart)) {MAXusedBuffer[wrdlen] = (unsigned long)(bufend[LetterOffset] - BufStart);}
//                      }
            if( 8 + 8 + 8 + 2*(LongestLineInclusive+1+4) < size_in64_L14 - (BufEnd_64-(unsigned long long)pointerflush_64) ) // the longest wrdlen is LongestLineInclusive but actual is LongestLineInclusive(+ CR char)
                      {
			PseudoLinkedPointerROOT_64 = BufEnd_64;
			BufEnd_64 = BufEnd_64 + 8 + 8 + 8;
			BufEnd_64 = BufEnd_64 + 2*(LongestLineInclusive+1+4);
			PseudoLinkedPointerAUX_64 = PseudoLinkedPointerROOT_64 + 8 + 8 + 8;
				if (BSTorBtree == 2) {
			fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			fwrite(&wrdUP[0], (LongestLineInclusive+1+4), 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( (char *)PseudoLinkedPointerAUX_64, &wrdUP[0], (LongestLineInclusive+1+4) );
				} // ########## 64bit memory manipulations ]
                      }
            else
            { printf( "\nLeprechaun: Failure! Increment 'Memory for each Letter' parameter(third one)!\n" );
	      fprintf( fp_outLOG, "Word count: %s of them %s distinct\n", _ui64toaKAZEcomma(WORDcount, llTOaDigits, 10), _ui64toaKAZEcomma((unsigned long long)WORDcountDistinct, llTOaDigits2, 10) );
	      fprintf( fp_outLOG, "Allocated memory: %s bytes\n", _ui64toaKAZEcomma(size_in64_L14, llTOaDigits, 10) );
	      fprintf( fp_outLOG, "Total Attempts to Find/Put WORDs into B-trees order 3: %s\n", _ui64toaKAZEcomma(WORDcountAttemptsToPut, llTOaDigits, 10) );
	      fprintf( fp_outLOG, "Used value for third parameter in KB: %lu\n", (unsigned long)Thunderwith );
	      fprintf( fp_outLOG, "Leprechaun: Failure! Increment 'Memory for each Letter' parameter(third one)!\n\n");
              return( 1 );
            }
            // Here                    --  'PseudoLinkedPointerROOT' --
            //                         |           (wrdUP)            |
            // 'PseudoLinkedPointer' <--                              --> 'PseudoLinkedPointerNEW'
            //         (LW)                                                          (RW)
//            memcpy( PseudoLinkedPointerROOT, &PseudoLinkedPointer, 4 );      // LP
//            memcpy( PseudoLinkedPointerROOT+4, &PseudoLinkedPointerNEW, 4 ); // MP
//            // Here must NEW ROOT be updated i.e. HASH table(SLOT) must point it:
//            memcpy( BufStart+Slot, &PseudoLinkedPointerROOT, 4 );
	      PseudoLinkedPointerAUX_64 = PseudoLinkedPointerROOT_64;
				if (BSTorBtree == 2) {
	      fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
	      fwrite(&PseudoLinkedPointer_64, 8, 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( (char *)PseudoLinkedPointerAUX_64, &PseudoLinkedPointer_64, 8 );
				} // ########## 64bit memory manipulations ]
	       PseudoLinkedPointerAUX_64 = PseudoLinkedPointerROOT_64 + 8;
				if (BSTorBtree == 2) {
	       fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
	       fwrite(&PseudoLinkedPointerNEW_64, 8, 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( (char *)PseudoLinkedPointerAUX_64, &PseudoLinkedPointerNEW_64, 8 );
				} // ########## 64bit memory manipulations ]
		memcpy( BufStart+Slot, &PseudoLinkedPointerROOT_64, 8 );
           break; //because it is ROOT without split
        }
              } // while
} //if (SplitOccured != 0)
// 3] Insert Iterative ]
} //if (FoundInLinkedList == 0)
} //if (DoNotInsertFlag == 0) { // This line comes since r15FIXFIX+
} //if ( REUSE != 2 ) { //  REUSE [ // This line comes since r16
// ########################################## B-tree order 3 64bit ]
} //if ( (Slot>>HashChunkSizeInBITS) == RipPasses ) {

	// External Btrees ]
} //if (BSTorBtree != 2) {
                        } // if ( ( PLE_words == 4 ) && ( wrdlen <= 31 ) ) {
                        } // if( 1 <= wrdlen && wrdlen <= 31 )
/*
The Most Stupid Bug I have ever made [
			else {
			PLE_words_INITflag = 1; // This line fixes the stupidity done since first quadrupleton r1, namely to initialize the sequence when a word longer than 31 is spotted - it is wrong to slide it.
			}
The Most Stupid Bug I have ever made ]
*/
//r.15fix [
//This line was intended BUT placed before the fragment 'if( 1 <= wrdlen && wrdlen <= 31 )' [
//                      if( wrdlen > 31 ) PLE_words_INITflag = 1; // Not when wrdlen == 0 as the above buggy fragment!
//This line was intended BUT placed before the fragment 'if( 1 <= wrdlen && wrdlen <= 31 )' ]
//r.15fix ]
if ( PLE_words_INITflag == 1 ) { PLE_words = 0; PLE_words_INITflag = 0; } // Quadruple!
			wrdlen = 0;
                        // This fragment is MIRRORed: #1 copy ]
                }
                //else if( workbyte >= 'A' &&  workbyte <= 'Z' )
                else if( workbyte <= 'Z' )
		{
                        if( wrdlen < 31 )
                        //if( wrdlen < LongestLineInclusive )
                        { wrd[ wrdlen ] = workbyte + 32 ; }
                        wrdlen++;
		}
                else if( workbyte >= 'a' &&  workbyte <= 'z' )
		{
                        if( wrdlen < 31 )
                        //if( wrdlen < LongestLineInclusive )
                        { wrd[ wrdlen ] = workbyte; }
                        wrdlen++;
		}
		else
                {
                        // This fragment is MIRRORed: #2 copy [
                goto ElStupido;
                        // This fragment is MIRRORed: #2 copy ]
		}
        } // i 'for'
        //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//++Melnitchka;
//Melnitchka = Melnitchka % 4;
//if (Melnitchka == 0){ printf( "|; Word count: %s of them %s distinct; Done: %lu/64\n", _ui64toaKAZEcomma(WORDcount, llTOaDigits, 10), _ui64toaKAZEcomma((unsigned long long)WORDcountDistinct, llTOaDigits2, 10), 64 ); }
//if (Melnitchka == 1){ printf( "/; Word count: %s of them %s distinct; Done: %lu/64\n", _ui64toaKAZEcomma(WORDcount, llTOaDigits, 10), _ui64toaKAZEcomma((unsigned long long)WORDcountDistinct, llTOaDigits2, 10), 64 ); }
//if (Melnitchka == 2){ printf( "-; Word count: %s of them %s distinct; Done: %lu/64\n", _ui64toaKAZEcomma(WORDcount, llTOaDigits, 10), _ui64toaKAZEcomma((unsigned long long)WORDcountDistinct, llTOaDigits2, 10), 64 ); }
//if (Melnitchka == 3){ printf( "\\; Word count: %s of them %s distinct; Done: %lu/64\n", _ui64toaKAZEcomma(WORDcount, llTOaDigits, 10), _ui64toaKAZEcomma((unsigned long long)WORDcountDistinct, llTOaDigits2, 10), 64 ); }
Melnitchka = Melnitchka & 3; // 0 1 2 3: 00 01 10 11
				(void) time(&t4);
				if (t4 <= t1) {t4 = t1; t4++;}
printf( "%s; %sP/s; Phrase count: %s of them %s distinct; Done: %lu/64\n", Auberge[Melnitchka++], _ui64toaKAZEzerocomma(WORDcount/((int) t4-t1), llTOaDigits3, 10)+(26-10), _ui64toaKAZEcomma(WORDcount, llTOaDigits, 10), _ui64toaKAZEcomma((unsigned long long)WORDcountDistinct, llTOaDigits2, 10), 64 );

fclose( fp_inLINE );
} // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IT IS a FILENAME not a METACOMMAND ]
        LINE10len = 0;
LINE10[ LINE10len ] = 0;
                  }
                }
        } // k 'for'

(void) time(&t3);
if (t3 <= t1) {t3 = t1; t3++;}
printf( "Bytes per second performance: %sB/s\n", _ui64toaKAZEcomma(FilesLEN/((int) t3-t1), llTOaDigits, 10) );  // Rev. 12+
printf( "Phrases per second performance: %sP/s\n", _ui64toaKAZEcomma(WORDcount/((int) t3-t1), llTOaDigits, 10) );  // Rev. 12+
printf("Time for putting phrases into trees: %d second(s)\n", (int) t3-t1);

if (BSTorBtree < 2) {
        // FLUSH FLUSH FLUSH FLUSH FLUSH FLUSH FLUSH FLUSH FLUSH FLUSH FLUSH
printf("Flushing unsorted words ...\n");
if( ( fp_out = fopen( argv[2], "wb+" ) ) == NULL )
{ printf( "Leprechaun: Can't create file %s \n", argv[2] ); return( 1 ); }
ZEROS[0] = 0; ZEROS[1] = 0; ZEROS[2] = 0; ZEROS[3] = 0;
CRdLFa[0] = 13; CRdLFa[1] = 10;

for( i = 0; i < 806; i++ )
{ //BufStart = pointerflush + i * LetterBuffer; // OLD
  BufStart = pointerflush + (i / 31) * WHOLEletter_BufferSize + OffsetsInBuffer[i % 31];
//  for( j = 0; j < NumberOfSLOTs; j++ )
//  { 
//                      Slot = j<<2;
//                      memcpy( &PseudoLinkedPointer, BufStart+Slot, 4 );
//                      while (PseudoLinkedPointer != 0)
//                      { memcpy( &PseudoLinkedPointerNEW, PseudoLinkedPointer, 4 );
//                        memcpy( PseudoLinkedPointer, ZEROS, 4 );
//                        PseudoLinkedPointer = PseudoLinkedPointerNEW;
//                      } 
//  }
//  // Start of COUPLES [OFFSET: 4byte(ZEROS)][WORD:up to 31bytes]
//  //fwrite(BufStart+(NumberOfSLOTs+1)*4, bufend[i] - (BufStart+(NumberOfSLOTs+1)*4), 1, fp_out );
//  /* Follows STATE OF UGLINESS: */
//  Flushing = BufStart+(NumberOfSLOTs+1)*4 + 4; // '+ 4' in order to skip first 4 zeros
//  //in case of current buffer not have been used then NOT entering in this cycle
//  while(Flushing < bufend[i])
//  { if (*Flushing != 0) {fwrite(Flushing, 1, 1, fp_out ); TotalWLchars++;}
//    // Below 'Flushing-1' works due to skipped first 4 zeros!
//    if (*(Flushing-1) != 0 && *Flushing == 0) {fwrite(CRdLFa, 2, 1, fp_out);}
//    //last word must be suffixed with 1310 too
//    if (Flushing == bufend[i]-1) {fwrite(CRdLFa, 2, 1, fp_out );}
//    Flushing++;
//  }

  for( j = 0; j < NumberOfSLOTs; j++ )
  { 
                      Slot = j<<2;
                      memcpy( &PseudoLinkedPointer, BufStart+Slot, 4 );
                      if (PseudoLinkedPointer != 0)
                      {
NumberOfTrees++;
if (BSTorBtree == 0)
{
// =========================================================== BST traverse [
     // DONE JOB:
     // Must be written BST traverse ! with simulated stack i.e. non-recursive.
     // ...
                                        // /*
                                        // Given a binary search tree, print out
                                        // its data elements in increasing
                                        // sorted order.
                                        // */
                                        // void printTree(struct node* node) {
                                        // if (node == NULL) return;
                                        // printTree(node->left);
                                        // printf("%d ", node->data);
                                        // printTree(node->right);
                                        // }

     // FUTURE JOB:
     // I need functions:
     //                  BST_LeafNumber() // greater the better
     //                  BST_NodeNumber() // 'BSTcurrent' below
     //                  BST_Peak()       // i.e. levels, root has height = 1
     //                  BST_PeakIB()     // IBBST(Ideal Balanced BST) has 1 + lgNodeNumber height
     // I need 'Ideal Balancing BST FRAGMENT' with simulated stack:
     // I need 'Ideal Balancing BST FRAGMENT' to be executed when Peak() >= PeakIB()<<1:

// ---------------------------------------------------------------------- [
  BSTcurrentNode = 0; BSTcurrentPeak = 0; BSTcurrentLeaf = 0;
                      BSTcurrentPeakMAX = 0; // Height of current BST
  StackPtr = 0;
  while ( 2==2 ) {
        while (PseudoLinkedPointer != 0)
              { 
                if (StackPtr > 8192*3-1-3) { printf( "\nLeprechaun: Failure! BST simulated stack overflow, too high BST!\n" ); return( 13 );}
                memcpy( &PseudoLinkedPointerNEWleft, PseudoLinkedPointer, 4 );
                PseudoLinkedPointer = PseudoLinkedPointer + 4;
                memcpy( &PseudoLinkedPointerNEWright, PseudoLinkedPointer, 4 );
                BSTstack[StackPtr] = PseudoLinkedPointer + 4; ++StackPtr; //ptr to wrd
                BSTstack[StackPtr] = PseudoLinkedPointerNEWright; ++StackPtr;
                BSTstack[StackPtr] = PseudoLinkedPointerNEWleft; ++StackPtr; //needed for stats not for recursion
        // BST stats [
                  if (PseudoLinkedPointerNEWleft == 0 && PseudoLinkedPointerNEWright == 0) {BSTcurrentLeaf++; BSTsTotalLEAFs++;}
                  BSTcurrentPeak++;
                  if (BSTcurrentPeakMAX < BSTcurrentPeak) BSTcurrentPeakMAX = BSTcurrentPeak;
                BSTstack[StackPtr] = BSTcurrentPeak; ++StackPtr; //needed for stats not for recursion
        // BST stats ]
                PseudoLinkedPointer = PseudoLinkedPointerNEWright; // choose right instead of 'PseudoLinkedPointerNEWleft' because of stats print
              }
        if (StackPtr == 0) break;
        BSTcurrentPeak = BSTstack[--StackPtr]; // level of the node(1 is root) needed only for stats(print)
        PseudoLinkedPointerNEWleft = BSTstack[--StackPtr]; // left pointer needed only for stats(print)
        PseudoLinkedPointerNEWright = BSTstack[--StackPtr]; // right pointer
        memcpy( wrd, BSTstack[--StackPtr], i%31+1 );
        fwrite(wrd, i%31+1, 1, fp_out); TotalWLchars = TotalWLchars + i%31+1;
        fwrite(CRdLFa, 2, 1, fp_out);
        BSTcurrentNode++;
        PseudoLinkedPointer = PseudoLinkedPointerNEWleft;
  }                                          
// ---------------------------------------------------------------------- ]
        // BST stats [
        if (BSTwithMAXnode < BSTcurrentNode) {
           BSTwithMAXnode = BSTcurrentNode;
           BSTwithMAXnodePEAK = BSTcurrentPeakMAX;
           BSTwithMAXnodeLEAF = BSTcurrentLeaf;
           BSTcurrentNodeMAXqUANTITY = 0;
        }
        if (BSTwithMAXnode == BSTcurrentNode) BSTcurrentNodeMAXqUANTITY++;
        if (BSTwithMAXpeak < BSTcurrentPeakMAX) {
           BSTwithMAXpeak = BSTcurrentPeakMAX;
           BSTwithMAXpeakNODE = BSTcurrentNode;
           BSTwithMAXpeakLEAF = BSTcurrentLeaf;
           BSTcurrentPeakMAXqUANTITY = 0;       iBSTwithMAXpeak=i; jBSTwithMAXpeak=j;
        }
        if (BSTwithMAXpeak == BSTcurrentPeakMAX) BSTcurrentPeakMAXqUANTITY++;
        if (BSTwithMAXleaf < BSTcurrentLeaf) {
           BSTwithMAXleaf = BSTcurrentLeaf;
           BSTwithMAXleafNODE = BSTcurrentNode;
           BSTwithMAXleafPEAK = BSTcurrentPeakMAX;
           BSTcurrentLeafMAXqUANTITY = 0; 
        }
        if (BSTwithMAXleaf == BSTcurrentLeaf) BSTcurrentLeafMAXqUANTITY++; 
        // BST stats ]
// =========================================================== BST traverse ]
} else
{
// $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ B-tree order 3 traverse [
     // DONE JOB:
     // Must be written B-tree traverse ! with simulated stack i.e. non-recursive.
     // ...
  StackPtr = 0;
  while ( 2==2 ) {
        while (PseudoLinkedPointer != 0)
              { 
                if (StackPtr > 8192*3-1) { printf( "\nLeprechaun: Failure! B-tree simulated stack overflow, too high B-tree!\n" ); return( 13 );}
                BSTstack[StackPtr] = PseudoLinkedPointer; ++StackPtr; //ptr to Rwrd
if ( *(char *)(PseudoLinkedPointer + 4 + 4 + 4 + i%31+1) == 0 ) {memcpy( PseudoLinkedPointer + 4 + 4, &BufStart[NumberOfSLOTs*4], 4 );}
                memcpy( &PseudoLinkedPointerNEWleft, PseudoLinkedPointer, 4 );
                memcpy( &PseudoLinkedPointerNEWmiddle, PseudoLinkedPointer + 4, 4 );
                memcpy( &PseudoLinkedPointerNEWright, PseudoLinkedPointer + 4 + 4, 4 );
// Give first from right to left non-zero PTR
                if (PseudoLinkedPointerNEWright !=0 )
                { memcpy( PseudoLinkedPointer + 4 + 4, &BufStart[NumberOfSLOTs*4], 4 );
                  PseudoLinkedPointer = PseudoLinkedPointerNEWright;
                }
                else if (PseudoLinkedPointerNEWmiddle !=0 )
                { memcpy( PseudoLinkedPointer + 4, &BufStart[NumberOfSLOTs*4], 4 );
                  PseudoLinkedPointer = PseudoLinkedPointerNEWmiddle;
                }
                else if (PseudoLinkedPointerNEWleft !=0 )
                { memcpy( PseudoLinkedPointer, &BufStart[NumberOfSLOTs*4], 4 );
                  PseudoLinkedPointer = PseudoLinkedPointerNEWleft;
                }
                else
                {
                  PseudoLinkedPointer = 0;
                }
              }
        if (StackPtr == 0) break;
        PseudoLinkedPointer = BSTstack[--StackPtr];
                memcpy( &PseudoLinkedPointerNEWleft, PseudoLinkedPointer, 4 );
                memcpy( &PseudoLinkedPointerNEWmiddle, PseudoLinkedPointer + 4, 4 );
                memcpy( &PseudoLinkedPointerNEWright, PseudoLinkedPointer + 4 + 4, 4 );
        if (PseudoLinkedPointerNEWleft+PseudoLinkedPointerNEWmiddle+PseudoLinkedPointerNEWright == 0) // One LEAF is PRINTED when LP=0 MP=0 RP=0
           { 
        memcpy( wrd, PseudoLinkedPointer + 4 + 4 + 4, i%31+1 );
        fwrite(wrd, i%31+1, 1, fp_out); TotalWLchars = TotalWLchars + i%31+1;
        fwrite(CRdLFa, 2, 1, fp_out); 
        if ( *(char *)(PseudoLinkedPointer + 4 + 4 + 4 + i%31+1) != 0 )
             { memcpy( wrd, PseudoLinkedPointer + 4 + 4 + 4 + i%31+1, i%31+1 );
               fwrite(wrd, i%31+1, 1, fp_out); TotalWLchars = TotalWLchars + i%31+1;
               fwrite(CRdLFa, 2, 1, fp_out); 
             }
        PseudoLinkedPointer = 0;
           }
  }                                          
// $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ B-tree order 3 traverse ]
}
                      }
  } // j

} // i

if (BSTorBtree == 0)
{
// ~~~~~ Longest path ~~~~~ [
i=iBSTwithMAXpeak; j=jBSTwithMAXpeak;
  BufStart = pointerflush + (i / 31) * WHOLEletter_BufferSize + OffsetsInBuffer[i % 31];
                      Slot = j<<2;
                      memcpy( &PseudoLinkedPointer, BufStart+Slot, 4 );
                      if (PseudoLinkedPointer != 0)
                      {
// ---------------------------------------------------------------------- [
  BSTcurrentNode = 0; BSTcurrentPeak = 0; BSTcurrentLeaf = 0;
                      BSTcurrentPeakMAX = 0; // Height of current BST
  StackPtr = 0;
        // BST print [
                  fprintf( fp_outLOG, "A(not always THE) Binary-Search-Tree with the longest path(height, PEAK, number of levels):\n" );
        // BST print ]
  while ( 2==2 ) {
        while (PseudoLinkedPointer != 0)
              { 
                if (StackPtr > 8192*3-1-3) { printf( "\nLeprechaun: Failure! BST simulated stack overflow, too high BST!\n" ); return( 13 );}
                memcpy( &PseudoLinkedPointerNEWleft, PseudoLinkedPointer, 4 );
                PseudoLinkedPointer = PseudoLinkedPointer + 4;
                memcpy( &PseudoLinkedPointerNEWright, PseudoLinkedPointer, 4 );
                BSTstack[StackPtr] = PseudoLinkedPointer + 4; ++StackPtr; //ptr to wrd
                BSTstack[StackPtr] = PseudoLinkedPointerNEWright; ++StackPtr;
                BSTstack[StackPtr] = PseudoLinkedPointerNEWleft; ++StackPtr; //needed for stats not for recursion
        // BST stats [
                  if (PseudoLinkedPointerNEWleft == 0 && PseudoLinkedPointerNEWright == 0) {BSTcurrentLeaf++;} //BSTsTotalLEAFs++;} // REMOVED to avoid mess in TOTAL stats
                  BSTcurrentPeak++;
                  if (BSTcurrentPeakMAX < BSTcurrentPeak) BSTcurrentPeakMAX = BSTcurrentPeak;
                BSTstack[StackPtr] = BSTcurrentPeak; ++StackPtr; //needed for stats not for recursion
        // BST stats ]
                PseudoLinkedPointer = PseudoLinkedPointerNEWright; // choose right instead of 'PseudoLinkedPointerNEWleft' because of stats print
              }
        if (StackPtr == 0) break;
        BSTcurrentPeak = BSTstack[--StackPtr]; // level of the node(1 is root) needed only for stats(print)
        PseudoLinkedPointerNEWleft = BSTstack[--StackPtr]; // left pointer needed only for stats(print)
        PseudoLinkedPointerNEWright = BSTstack[--StackPtr]; // right pointer
        memcpy( wrd, BSTstack[--StackPtr], i%31+1 );
        //fwrite(wrd, i%31+1, 1, fp_out); TotalWLchars = TotalWLchars + i%31+1;
        //fwrite(CRdLFa, 2, 1, fp_out);
        BSTcurrentNode++;
        PseudoLinkedPointer = PseudoLinkedPointerNEWleft;
        // BST print [
                  for( k = 0; k < BSTcurrentPeak; k++ ) fprintf( fp_outLOG, "%c", ' ' );
                     if (PseudoLinkedPointerNEWleft == 0) fprintf( fp_outLOG, "[" ); else fprintf( fp_outLOG, "]" ); 
                  for( k = 0; k < i%31+1; k++ ) fprintf( fp_outLOG, "%c", *(char *)(wrd+k) );
                     if (PseudoLinkedPointerNEWright == 0) fprintf( fp_outLOG, "]" ); else fprintf( fp_outLOG, "[" );
                     if (BSTcurrentPeak == 1) fprintf( fp_outLOG, " ROOT" );
                     fprintf( fp_outLOG, "\n" );
        // BST print ]
  }                                          
// ---------------------------------------------------------------------- ]
                      }
fprintf( fp_outLOG, "Above Binary-Search-Tree with MaxPEAK = %s has NODEs = %s and LEAFs = %s\n", _ui64toaKAZEcomma(BSTwithMAXpeak, llTOaDigits2, 10), _ui64toaKAZEcomma(BSTwithMAXpeakNODE, llTOaDigits3, 10), _ui64toaKAZEcomma(BSTwithMAXpeakLEAF, llTOaDigits, 10));
                  fprintf( fp_outLOG, "Legend:\n" );
                  fprintf( fp_outLOG, "At left side of the word - '[' means no left successor\n" );
                  fprintf( fp_outLOG, "At left side of the word - ']' means left successor exists\n" );
                  fprintf( fp_outLOG, "At right side of the word - ']' means no right successor\n" );
                  fprintf( fp_outLOG, "At right side of the word - '[' means right successor exists\n" );
// ~~~~~ Longest path ~~~~~ ]

        // BST stats [
PEAKibBST=1+floorLog2(BSTwithMAXnode);
//PEAKibBST=1;
//while (BSTwithMAXnode>>PEAKibBST) PEAKibBST++;
        // BST stats ]
}

(void) time(&t2);
if (t2 <= t1) {t2 = t1; t2++;}
printf("Time for making unsorted wordlist: %d second(s)\n", (int) t2-t1);
fprintf( fp_outLOG, "Bytes per second performance: %sB/s\n", _ui64toaKAZEcomma(FilesLEN/((int) t3-t1), llTOaDigits, 10) );  // Rev. 12+
fprintf( fp_outLOG, "Words per second performance: %sW/s\n", _ui64toaKAZEcomma(WORDcount/((int) t3-t1), llTOaDigits, 10) );  // Rev. 12+
fprintf( fp_outLOG, "Input File with a list of TEXTual Files: %s\n", argv[1] );
fprintf( fp_outLOG, "Size of all TEXTual Files: %s\n", _ui64toaKAZEcomma(FilesLEN, llTOaDigits, 10) );
fprintf( fp_outLOG, "Word count: %s of them %s distinct\n", _ui64toaKAZEcomma(WORDcount, llTOaDigits, 10), _ui64toaKAZEcomma((unsigned long long)WORDcountDistinct, llTOaDigits2, 10) );
fprintf( fp_outLOG, "Number Of Files: %lu\n", NumberOfFiles );
fprintf( fp_outLOG, "Number Of Lines: %lu\n", NumberOfLines );
fprintf( fp_outLOG, "Allocated memory in MB: %lu\n", (unsigned long)(memory_size>>20)+1 );
NumberOfHashCollisions = WORDcountDistinct - NumberOfTrees;
fprintf( fp_outLOG, "Number Of Trees(GREATER THE BETTER): %lu\n", NumberOfTrees );
fprintf( fp_outLOG, "Forest population(Hash Function Quality regarding Collisions i.e. Hash Table Utilization): %lu%s\n", (NumberOfTrees*100)/(26*31*8192), "%\0" );
fprintf( fp_outLOG, "Number Of Hash Collisions(Distinct WORDs - Number Of Trees): %lu\n", NumberOfHashCollisions );

if (BSTorBtree == 0)
{
fprintf( fp_outLOG, "Maximum Attempts to Find/Put a WORD into a Binary-Search-Tree: '%s'\n", _ui64toaKAZEcomma(BSTwithMAXpeak, llTOaDigits, 10) );
fprintf( fp_outLOG, "Total Attempts to Find/Put WORDs into Binary-Search-Trees: %s\n", _ui64toaKAZEcomma(WORDcountAttemptsToPut, llTOaDigits, 10) );
fprintf( fp_outLOG, "Total Number of LEAFs in Binary-Search-Trees(GREATER THE BETTER): %s\n", _ui64toaKAZEcomma(BSTsTotalLEAFs, llTOaDigits, 10) );
fprintf( fp_outLOG, "Perfectly-Balanced-Binary-Search-Tree for MaxNODEs = %s must have PEAK = %s = rounding down of integer (1+lb(%s))\n", _ui64toaKAZEcomma(BSTwithMAXnode, llTOaDigits, 10), _ui64toaKAZEcomma(PEAKibBST, llTOaDigits2, 10), _ui64toaKAZEcomma(BSTwithMAXnode, llTOaDigits3, 10));
fprintf( fp_outLOG, "Binary-Search-Tree(1st out of %s) with MaxNODEs = %s has PEAK = %s and LEAFs = %s\n", _ui64toaKAZEcomma(BSTcurrentNodeMAXqUANTITY, llTOaDigits4, 10), _ui64toaKAZEcomma(BSTwithMAXnode, llTOaDigits2, 10), _ui64toaKAZEcomma(BSTwithMAXnodePEAK, llTOaDigits3, 10), _ui64toaKAZEcomma(BSTwithMAXnodeLEAF, llTOaDigits, 10));
fprintf( fp_outLOG, "Binary-Search-Tree(1st out of %s) with MaxPEAK = '%s' has NODEs = %s and LEAFs = %s\n", _ui64toaKAZEcomma(BSTcurrentPeakMAXqUANTITY, llTOaDigits4, 10), _ui64toaKAZEcomma(BSTwithMAXpeak, llTOaDigits2, 10), _ui64toaKAZEcomma(BSTwithMAXpeakNODE, llTOaDigits3, 10), _ui64toaKAZEcomma(BSTwithMAXpeakLEAF, llTOaDigits, 10));
fprintf( fp_outLOG, "Binary-Search-Tree(1st out of %s) with MaxLEAFs = %s has NODEs = %s and PEAK = %s\n", _ui64toaKAZEcomma(BSTcurrentLeafMAXqUANTITY, llTOaDigits4, 10), _ui64toaKAZEcomma(BSTwithMAXleaf, llTOaDigits2, 10), _ui64toaKAZEcomma(BSTwithMAXleafNODE, llTOaDigits3, 10), _ui64toaKAZEcomma(BSTwithMAXleafPEAK, llTOaDigits, 10));
} else
{
fprintf( fp_outLOG, "Total Attempts to Find/Put WORDs into B-trees order 3: %s\n", _ui64toaKAZEcomma(WORDcountAttemptsToPut, llTOaDigits, 10) );
}

for( k = 1; k < 32; k++ )
{ fprintf( fp_outLOG, "Words with length %s occupy %sKB of %sKB given i.e. %s%s utilization\n", _ui64toaKAZEzerocomma(k, llTOaDigits, 10)+(26-2), _ui64toaKAZEzerocomma((MAXusedBuffer[k]>>10)+1, llTOaDigits2, 10)+(26-5), _ui64toaKAZEzerocomma((((GRMBLhill[(int)k] * LetterBuffer)/31)>>10)+1, llTOaDigits3, 10)+(26-5), _ui64toaKAZEzerocomma((unsigned long long)(MAXusedBuffer[k]*100)/((GRMBLhill[(int)k] * LetterBuffer)/31), llTOaDigits4, 10)+(26-2), "%\0" ); // 26 are all 26-DESIRED=24
  if ( MAXusedBufferABS < (31 * ((MAXusedBuffer[k]>>10)+1)) / GRMBLhill[(int)k] ) {MAXusedBufferABS = 1+(31 * ((MAXusedBuffer[k]>>10)+1)) / GRMBLhill[(int)k];}
  Utiliza1 = Utiliza1 + (MAXusedBuffer[k]>>10)+1;
  Utiliza2 = Utiliza2 + (((GRMBLhill[(int)k] * LetterBuffer)/31)>>10)+1;
}
fprintf( fp_outLOG, "Total pseudo(including hash table) memory utilization: %s%s\n", _ui64toaKAZEzerocomma((Utiliza1*100)/Utiliza2, llTOaDigits, 10)+(26-2), "%\0" ); // 26 are all 26-DESIRED=24
fprintf( fp_outLOG, "Total real(wordlist's words VS allocated block) memory utilization: %s/1000\n", _i64toaKAZE(((unsigned long long)TotalWLchars*1000)/memory_size, llTOaDigits, 10) ); // 26 are all 26-DESIRED=24
fprintf( fp_outLOG, "Used value for third parameter in KB: %lu\n", (unsigned long)Thunderwith );
fprintf( fp_outLOG, "Use next time as third parameter: %lu-\n", MAXusedBufferABS ); // 26 are all 26-DESIRED=24
fprintf( fp_outLOG, "Time for making unsorted wordlist: %d second(s)\n", (int) t2-t1);

// EXIT EXIT EXIT EXIT EXIT EXIT EXIT EXIT EXIT EXIT EXIT EXIT EXIT EXIT EXIT
printf( "Deallocated memory in MB: %lu\n", (memory_size>>20)+1 );
free(pointerflushUNALIGN);
fclose(fp_out);
fclose(fp_outLOG);
// SORT SORT SORT SORT SORT SORT SORT SORT SORT SORT SORT SORT SORT SORT SORT
   //printf("Uploading unsorted wordlist ...\n");
   if ((nlines = readlines(argv[2], &backup)) >= 0)
   {  //printf("Number of words(lines) uploaded: %lu\n", nlines);
      //printf("Note1: Press 'Ctrl+C' to abort sorting, unsorted wordlist(second parameter)\n");
      //printf("       will remain intact(unless flushing is in progress) because of\n");
      //printf("       pointers-to-data are being sorted not the data itself.\n");
      //printf("Note2: In near future 'InsertionX26Sort' will be replaced with 'QuickX26Sort':\n");
      //printf("       which is much faster than 'QuickSort' applied for data-at-once!\n");

      // !!!??? I AM DISAPPOINTED x26 is just an illusion !!!???
      // X26 X26 X26 X26 X26 X26 X26 X26 X26 X26 X26 X26 X26 X26 X26 X26 X26
// argc is 4|5|6 due to eventual missing BufferSize
if( argc == 4 ) // not 5 due to eventual missing BufferSize
    k_FIX = 3;
if( argc == 5 || argc == 6 )  
    k_FIX = 4;
if (*argv[k_FIX] != 'A' && *argv[k_FIX] != 'a' && *argv[k_FIX] != 'B' && *argv[k_FIX] != 'b' && *argv[k_FIX] != 'C' && *argv[k_FIX] != 'c' && *argv[k_FIX] != 'D' && *argv[k_FIX] != 'd')
{ printf("Sorting(with 'MultiKeyQuickSortX26Sort' by J. Bentley and R. Sedgewick) ...\n");
      /* ???!!! What an unexpected behavior! I have been hit: for SOED5.HTM(259,835 distinct words): InsertionX26Sort gives 46s, InsertionSort gives 28s */
      for( k = 0; k < 26; k++ )
      { printf( "Sort pass %s/26 ...\r", _ui64toaKAZEzerocomma(k+1, llTOaDigits, 10)+(26-2));
        HEADOffetFromStartBUKVA = TAILOffetFromStartBUKVA;
        while( (TAILOffetFromStartBUKVA < nlines) && (*backup[TAILOffetFromStartBUKVA] - 'a' == k) )
        { TAILOffetFromStartBUKVA++;
        }
        if (HEADOffetFromStartBUKVA != TAILOffetFromStartBUKVA)
        { mkqsort_main(backup + HEADOffetFromStartBUKVA, TAILOffetFromStartBUKVA - HEADOffetFromStartBUKVA + 0); // backup[0..nlines-1]
        }
      }
}
else
{
if (*argv[k_FIX] == 'A' || *argv[k_FIX] == 'a')
   { printf("Sorting(with 'InsertionSort') ...");
     InsertSortKAZE(backup, nlines, 0); // backup[0..nlines-1]
   }
if (*argv[k_FIX] == 'B' || *argv[k_FIX] == 'b')
   { printf("Sorting(with 'InsertionX26Sort') ...\n");
      /* ???!!! What an unexpected behavior! I have been hit: for SOED5.HTM(259,835 distinct words): InsertionX26Sort gives 46s, InsertionSort gives 28s */
      for( k = 0; k < 26; k++ )
      { printf( "Sort pass %s/26 ...\r", _ui64toaKAZEzerocomma(k+1, llTOaDigits, 10)+(26-2));
        HEADOffetFromStartBUKVA = TAILOffetFromStartBUKVA;
        while( (TAILOffetFromStartBUKVA < nlines) && (*backup[TAILOffetFromStartBUKVA] - 'a' == k) )
        { TAILOffetFromStartBUKVA++;
        }
        if (HEADOffetFromStartBUKVA != TAILOffetFromStartBUKVA)
        { InsertSortKAZE(backup + HEADOffetFromStartBUKVA, TAILOffetFromStartBUKVA - HEADOffetFromStartBUKVA + 0, 0); // backup[0..nlines-1]
        }
      }
   }
if (*argv[k_FIX] == 'C' || *argv[k_FIX] == 'c')
   { printf("Sorting(with 'MultiKeyQuickSortSort' by J. Bentley and R. Sedgewick) ...");
     mkqsort_main(backup, nlines); // backup[0..nlines-1]
   }
if (*argv[k_FIX] == 'D' || *argv[k_FIX] == 'd')
   { printf("Sorting(with 'MultiKeyQuickSortX26Sort' by J. Bentley and R. Sedgewick) ...\n");
      /* ???!!! What an unexpected behavior! I have been hit: for SOED5.HTM(259,835 distinct words): InsertionX26Sort gives 46s, InsertionSort gives 28s */
      for( k = 0; k < 26; k++ )
      { printf( "Sort pass %s/26 ...\r", _ui64toaKAZEzerocomma(k+1, llTOaDigits, 10)+(26-2));
        HEADOffetFromStartBUKVA = TAILOffetFromStartBUKVA;
        while( (TAILOffetFromStartBUKVA < nlines) && (*backup[TAILOffetFromStartBUKVA] - 'a' == k) )
        { TAILOffetFromStartBUKVA++;
        }
        if (HEADOffetFromStartBUKVA != TAILOffetFromStartBUKVA)
        { mkqsort_main(backup + HEADOffetFromStartBUKVA, TAILOffetFromStartBUKVA - HEADOffetFromStartBUKVA + 0); // backup[0..nlines-1]
        }
      }
   }
}
      // X26 X26 X26 X26 X26 X26 X26 X26 X26 X26 X26 X26 X26 X26 X26 X26 X26

      printf("\nFlushing sorted words ...\n");
      if( ( fp_out = fopen( argv[2], "wb+" ) ) == NULL )
      { printf( "Leprechaun: Can't create file %s \n", argv[2] ); return( 1 ); }
      for( j = 0; j < nlines; j++ )
      { //Slot = KuxHash3plus(backup[j]);
        //fprintf(fp_out, "Hashcode: %s - ", _ui64toaKAZEzerocomma(Slot, llTOaDigits, 10)+(26-5));
        fprintf(fp_out, "%s", backup[j]);
        fwrite(CRdLFa, 2, 1, fp_out );
      }
      (void) time(&t3);
      if (t3 <= t2) {t3 = t2; t3++;}
      printf("Time for sorting unsorted wordlist: %d second(s)\n", (int) t3-t2);

/*
// Hash benchmarking ---------------------------------------------- [

// 5[
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
      for( j = 0; j < nlines; j++ )
      { //Slot = KuxHash3plus(backup[j]);
        //fprintf(fp_out, "Hashcode: %s - ", _ui64toaKAZEzerocomma(Slot, llTOaDigits, 10)+(26-5));
              Slot = FNV1A_Hash_4_OCTETS(backup[j], (strlen(backup[j])>>2)); //13+++
      }
}
clocks2 = clock();
printf( "Performance of 'FNV1A_Hash_4_OCTETS': %lu words/clock or %lu MB/s\n", (nlines/((long)(clocks2 - clocks1 + 1)>>4)) , ((TotalWLchars>>10)/((long)(clocks2 - clocks1 + 1)>>4))  );
// 5]

// 1[
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
      for( j = 0; j < nlines; j++ )
      { //Slot = KuxHash3plus(backup[j]);
        //fprintf(fp_out, "Hashcode: %s - ", _ui64toaKAZEzerocomma(Slot, llTOaDigits, 10)+(26-5));
// To make it EVEN !!!
//wrdlen = strlen(backup[j]);
//if (strlen(backup[j]) != 0)
              Slot = FNV1A_Hash(backup[j]); //13+++
      }
}
clocks2 = clock();
printf( "Performance of 'FNV1A_Hash': %lu words/clock or %lu MB/s\n", (nlines/((long)(clocks2 - clocks1 + 1)>>4)) , ((TotalWLchars>>10)/((long)(clocks2 - clocks1 + 1)>>4))  );
// 1]

// 2[
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
      for( j = 0; j < nlines; j++ )
      { //Slot = KuxHash3plus(backup[j]);
        //fprintf(fp_out, "Hashcode: %s - ", _ui64toaKAZEzerocomma(Slot, llTOaDigits, 10)+(26-5));
              Slot = FNV1A_Hash_4_OCTETS_31(backup[j], (strlen(backup[j])>>2)); //13+++
      }
}
clocks2 = clock();
printf( "Performance of 'FNV1A_Hash_4_OCTETS_31': %lu words/clock or %lu MB/s\n", (nlines/((long)(clocks2 - clocks1 + 1)>>4)) , ((TotalWLchars>>10)/((long)(clocks2 - clocks1 + 1)>>4))  );
// 2]

// 4[
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
      for( j = 0; j < nlines; j++ )
      { //Slot = KuxHash3plus(backup[j]);
        //fprintf(fp_out, "Hashcode: %s - ", _ui64toaKAZEzerocomma(Slot, llTOaDigits, 10)+(26-5));
// To make it EVEN !!!
//wrdlen = strlen(backup[j]);
//if (strlen(backup[j]) != 0)
              Slot = KuxHash3plus(backup[j]); //13++
      }
}
clocks2 = clock();
printf( "Performance of 'KuxHash3plus': %lu words/clock or %lu MB/s\n", (nlines/((long)(clocks2 - clocks1 + 1)>>4)) , ((TotalWLchars>>10)/((long)(clocks2 - clocks1 + 1)>>4))  );
// 4]

// 6[
clocks1 = clock();
    for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
      for( j = 0; j < nlines; j++ )
      { //Slot = KuxHash3plus(backup[j]);
        //fprintf(fp_out, "Hashcode: %s - ", _ui64toaKAZEzerocomma(Slot, llTOaDigits, 10)+(26-5));
wrdlen = strlen(backup[j]);
if (wrdlen<=19) // 4x4+3=19 i.e. last contains 7 clashes
              Slot = FNV1A_Hash_Granularity(backup[j], wrdlen>>2, 2); //13+++++
else            // 2x8+4=20 i.e. first contains 6 clashes
              Slot = FNV1A_Hash_Granularity(backup[j], wrdlen>>3, 3); //13+++++
      } // Conclusion: two functions > 64 bytes lead to horrible slowness, so unite them in one: fit in the cache line.
}
clocks2 = clock();
printf( "Performance of 'FNV1A_Hash_Granularity': %lu words/clock or %lu MB/s\n", (nlines/((long)(clocks2 - clocks1 + 1)>>4)) , ((TotalWLchars>>10)/((long)(clocks2 - clocks1 + 1)>>4))  );
// 6]

// Hash benchmarking ---------------------------------------------- ]
*/

if( ( fp_outLOG = fopen( "Leprechaun.LOG", "a+" ) ) == NULL )
{ printf( "Leprechaun: Can't open file Leprechaun.LOG.\n" ); return( 1 ); }
fprintf( fp_outLOG, "Time for sorting unsorted wordlist: %d second(s)\n\n", (int) t3-t2);
printf( "Leprechaun: Done.\n" );
      return 0;
   } 
   else 
   {  printf("Leprechaun: Input file too large, wordlist remains unsorted!\n");
      return 1;
   }
// SORT SORT SORT SORT SORT SORT SORT SORT SORT SORT SORT SORT SORT SORT SORT
} else { //if (BSTorBtree != 2) {
	// External Btrees [

// r16 [
	if (REUSE==2) {
		fclose(fp_out); 
	}
// r16 ]

		if ( REUSE == 0 ) { // r16FIX <*><*><*><*><*><*><*><*><*><*><*><*><*><*><*><*><*><*><*><*><*><*> [

(void) time(&t1);
WORDcountBOTTOM = 0;
//printf("Flushing UNsorted phrases ...\r");
//if( ( fp_out = fopen( argv[2], "wb+" ) ) == NULL ) // Commented since r.14++++ because passes need concatenation.

if( ( fp_out = fopen( argv[2], "ab+" ) ) == NULL )
{ printf( "Leprechaun: Can't create file %s \n", argv[2] ); return( 1 ); }
CRdLFa[0] = 13; CRdLFa[1] = 10;

	BufStart = pointerflush;
//	for( j = 0; j < 28*28*28*28*28; j++ )
	for( j = 0; j < (1<<HashInBITS); j++ )
	{ 
                                if ((j & ((1<<14)-1)) == 0) {
				(void) time(&t3);
				if (t3 <= t1) {t3 = t1; t3++;}
				printf("Flushing UNsorted phrases: %s%%; Shaking trees performance: %sP/s\r", _ui64toaKAZEzerocomma(((long long)j*100)/((1<<HashInBITS)), llTOaDigits, 10)+(26-3), _ui64toaKAZEzerocomma(WORDcountBOTTOM/((int) t3-t1), llTOaDigits2, 10)+(26-10));
				}
                      Slot = j<<3;
                      memcpy( &PseudoLinkedPointer_64, BufStart+Slot, 8 );
                      if (PseudoLinkedPointer_64 != 0)
                      {
	NumberOfTrees++;

// $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ B-tree order 3 traverse 64bit [
     // DONE JOB:
     // Must be written B-tree traverse ! with simulated stack i.e. non-recursive.
     // ...
  StackPtr = 0;
  while ( 2==2 ) {
        while (PseudoLinkedPointer_64 != 0)
              { 
                if (StackPtr > 8192*3-1) { printf( "\nLeprechaun: Failure! B-tree simulated stack overflow, too high B-tree!\n" ); return( 13 );}
                BSTstack[StackPtr] = PseudoLinkedPointer_64; ++StackPtr; //ptr to Rwrd
//if ( *(char *)(PseudoLinkedPointer + 4 + 4 + 4 + i%31+1) == 0 ) {memcpy( PseudoLinkedPointer + 4 + 4, &BufStart[NumberOfSLOTs*4], 4 );}
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
		//PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8 + (LongestLineInclusive+1+4); //RW
		//fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
		//fread(&SomeByte, 1, 1, fp_outRG);
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			// [  //r.14+
			PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64;
				if (BSTorBtree == 2) {
			fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			fread(&LEAF[0], 8+8+8+2*(LongestLineInclusive+1+4), 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( &LEAF[0], (char *)PseudoLinkedPointerAUX_64, 8+8+8+2*(LongestLineInclusive+1+4) );
				} // ########## 64bit memory manipulations ]
		  	memcpy( &SomeByte, &LEAF[8 + 8 + 8 + (LongestLineInclusive+1+4)], 1 );
			// ]  //r.14+
		if (SomeByte == 0 ) // RW exists not
		 {
		  PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8;
				if (BSTorBtree == 2) {
		  fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
		  fwrite(&NULLs_64, 8, 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( (char *)PseudoLinkedPointerAUX_64, &NULLs_64, 8 );
				} // ########## 64bit memory manipulations ]
			// [  //r.14+
		  	memcpy( &LEAF[8 + 8], &NULLs_64, 8 );
			// ]  //r.14+
		 }
//                memcpy( &PseudoLinkedPointerNEWleft, PseudoLinkedPointer, 4 );
//                memcpy( &PseudoLinkedPointerNEWmiddle, PseudoLinkedPointer + 4, 4 );
//                memcpy( &PseudoLinkedPointerNEWright, PseudoLinkedPointer + 4 + 4, 4 );
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
		  //PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64;
		  //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
		  //fread(&PseudoLinkedPointerNEWleft_64, 8, 1, fp_outRG);
		   //PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8;
		   //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
		   //fread(&PseudoLinkedPointerNEWmiddle_64, 8, 1, fp_outRG);
		    //PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8;
		    //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
		    //fread(&PseudoLinkedPointerNEWright_64, 8, 1, fp_outRG);
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			// [  //r.14+
		  	memcpy( &PseudoLinkedPointerNEWleft_64, &LEAF[0], 8 );
		  	memcpy( &PseudoLinkedPointerNEWmiddle_64, &LEAF[8], 8 );
		  	memcpy( &PseudoLinkedPointerNEWright_64, &LEAF[8+8], 8 );
			// ]  //r.14+
// Give first from right to left non-zero PTR
                if (PseudoLinkedPointerNEWright_64 !=0 )
//                { memcpy( PseudoLinkedPointer + 4 + 4, &BufStart[NumberOfSLOTs*4], 4 );
//                  PseudoLinkedPointer = PseudoLinkedPointerNEWright;
//                }
                {
		PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8;
				if (BSTorBtree == 2) {
		fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
		fwrite(&NULLs_64, 8, 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( (char *)PseudoLinkedPointerAUX_64, &NULLs_64, 8 );
				} // ########## 64bit memory manipulations ]
		 PseudoLinkedPointer_64 = PseudoLinkedPointerNEWright_64;
                }
                else if (PseudoLinkedPointerNEWmiddle_64 !=0 )
//                { memcpy( PseudoLinkedPointer + 4, &BufStart[NumberOfSLOTs*4], 4 );
//                  PseudoLinkedPointer = PseudoLinkedPointerNEWmiddle;
//                }
                {
		PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8;
				if (BSTorBtree == 2) {
		fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
		fwrite(&NULLs_64, 8, 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( (char *)PseudoLinkedPointerAUX_64, &NULLs_64, 8 );
				} // ########## 64bit memory manipulations ]
		 PseudoLinkedPointer_64 = PseudoLinkedPointerNEWmiddle_64;
                }
                else if (PseudoLinkedPointerNEWleft_64 !=0 )
//                { memcpy( PseudoLinkedPointer, &BufStart[NumberOfSLOTs*4], 4 );
//                  PseudoLinkedPointer = PseudoLinkedPointerNEWleft;
//                }
                {
		PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64;
				if (BSTorBtree == 2) {
		fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
		fwrite(&NULLs_64, 8, 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( (char *)PseudoLinkedPointerAUX_64, &NULLs_64, 8 );
				} // ########## 64bit memory manipulations ]
		 PseudoLinkedPointer_64 = PseudoLinkedPointerNEWleft_64;
                }
                else
                {
                  PseudoLinkedPointer_64 = 0;
                }
              }
	    if (LevelsInCorona_Not_Counting_ROOT < StackPtr) LevelsInCorona_Not_Counting_ROOT = StackPtr; //r.14
        if (StackPtr == 0) break;
        PseudoLinkedPointer_64 = BSTstack[--StackPtr];
//                memcpy( &PseudoLinkedPointerNEWleft, PseudoLinkedPointer, 4 );
//                memcpy( &PseudoLinkedPointerNEWmiddle, PseudoLinkedPointer + 4, 4 );
//                memcpy( &PseudoLinkedPointerNEWright, PseudoLinkedPointer + 4 + 4, 4 );
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
		  //PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64;
		  //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
		  //fread(&PseudoLinkedPointerNEWleft_64, 8, 1, fp_outRG);
		   //PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8;
		   //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
		   //fread(&PseudoLinkedPointerNEWmiddle_64, 8, 1, fp_outRG);
		    //PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8;
		    //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
		    //fread(&PseudoLinkedPointerNEWright_64, 8, 1, fp_outRG);
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			// [  //r.14+
			PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64;
				if (BSTorBtree == 2) {
			fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
			fread(&LEAF[0], 8+8+8+2*(LongestLineInclusive+1+4), 1, fp_outRG);
				} else { // ########## 64bit memory manipulations [
			memcpy( &LEAF[0], (char *)PseudoLinkedPointerAUX_64, 8+8+8+2*(LongestLineInclusive+1+4) );
				} // ########## 64bit memory manipulations ]
		  	memcpy( &PseudoLinkedPointerNEWleft_64, &LEAF[0], 8 );
		  	memcpy( &PseudoLinkedPointerNEWmiddle_64, &LEAF[8], 8 );
		  	memcpy( &PseudoLinkedPointerNEWright_64, &LEAF[8+8], 8 );
			// ]  //r.14+
        if (PseudoLinkedPointerNEWleft_64 + PseudoLinkedPointerNEWmiddle_64 + PseudoLinkedPointerNEWright_64 == 0) // One LEAF is PRINTED when LP=0 MP=0 RP=0
           { 
//        memcpy( wrd, PseudoLinkedPointer + 4 + 4 + 4, i%31+1 );
//        fwrite(wrd, i%31+1, 1, fp_out);
//        fwrite(CRdLFa, 2, 1, fp_out); 
//        if ( *(char *)(PseudoLinkedPointer + 4 + 4 + 4 + i%31+1) != 0 )
//             { memcpy( wrd, PseudoLinkedPointer + 4 + 4 + 4 + i%31+1, i%31+1 );
//               fwrite(wrd, i%31+1, 1, fp_out);
//               fwrite(CRdLFa, 2, 1, fp_out); 
//             }
//        PseudoLinkedPointer = 0;
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
		//PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8;
		//fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
		//fread(&wrd[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			// [  //r.14+
		  	memcpy( &wrd[0], &LEAF[8 + 8 + 8], (LongestLineInclusive+1+4) );
			// ]  //r.14+
			// Counter [
			memcpy( &CounterOccurrencies, &wrd[(LongestLineInclusive+1+4)-4], 4 );
			if (CounterOccurrencies<9999999) CounterOccurrencies++; // Starting from ZERO! Because when insertion happened there was no setting counter to 1.
			// Counter ]
if (*argv[k_FIX] == 'Y' || *argv[k_FIX] == 'Z') // || *argv[k_FIX] == 'W'
	        fprintf(fp_out, "%s\t%s\r\n", _ui64toaKAZEzerocomma(CounterOccurrencies, llTOaDigits2, 10)+(26-9), wrd); WORDcountBOTTOM++;
if (*argv[k_FIX] == 'y' || *argv[k_FIX] == 'z') // || *argv[k_FIX] == 'w'
	        fprintf(fp_out, "%s\r\n", wrd); WORDcountBOTTOM++;
        	//fwrite(CRdLFa, 2, 1, fp_out); 
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
		 //PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8 + (LongestLineInclusive+1+4); //RW
		 //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
		 //fread(&SomeByte, 1, 1, fp_outRG);
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			// [  //r.14+
		  	memcpy( &SomeByte, &LEAF[8 + 8 + 8 + (LongestLineInclusive+1+4)], 1 );
			// ]  //r.14+
		 if (SomeByte != 0 ) // RW exists
		 {
			// [  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
		 //PseudoLinkedPointerAUX_64 = PseudoLinkedPointer_64 + 8 + 8 + 8 + (LongestLineInclusive+1+4);
		 //fsetpos(fp_outRG, &PseudoLinkedPointerAUX_64);
		 //fread(&wrd[0], (LongestLineInclusive+1+4), 1, fp_outRG);
			// ]  //r.14+ Optimized I/O i.e. reading a LEAF at once not LEAF's elements one-by-one!
			// [  //r.14+
		  	memcpy( &wrd[0], &LEAF[8 + 8 + 8 + (LongestLineInclusive+1+4)], (LongestLineInclusive+1+4) );
			// ]  //r.14+
			// Counter [
			memcpy( &CounterOccurrencies, &wrd[(LongestLineInclusive+1+4)-4], 4 );
			if (CounterOccurrencies<9999999) CounterOccurrencies++; // Starting from ZERO! Because when insertion happened there was no setting counter to 1.
			// Counter ]
if (*argv[k_FIX] == 'Y' || *argv[k_FIX] == 'Z') // || *argv[k_FIX] == 'W'
   	         fprintf(fp_out, "%s\t%s\r\n", _ui64toaKAZEzerocomma(CounterOccurrencies, llTOaDigits2, 10)+(26-9), wrd); WORDcountBOTTOM++;
if (*argv[k_FIX] == 'y' || *argv[k_FIX] == 'z') // || *argv[k_FIX] == 'w'
   	         fprintf(fp_out, "%s\r\n", wrd); WORDcountBOTTOM++;
        	 //fwrite(CRdLFa, 2, 1, fp_out); 
		 }
		  PseudoLinkedPointer_64 = 0;
	    NumberOfLEAFs++; //r.14
           }
  }                                          
// $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ B-tree order 3 traverse 64bit ]

                      }
	}

fclose(fp_out);
(void) time(&t2);
if (t2 <= t1) {t2 = t1; t2++;}
printf("Flushing UNsorted phrases: %s%%; Shaking trees performance: %sP/s\n", _ui64toaKAZEzerocomma(100, llTOaDigits, 10)+(26-3), _ui64toaKAZEzerocomma(WORDcountBOTTOM/((int) t2-t1), llTOaDigits2, 10)+(26-10));
printf("Time for shaking phrases from trees: %d second(s)\n", (int) t2-t1);

// TO DO ...
//printf( "Leprechaun: THE DUMP NOT SORTED?! HASH sucks also - these two issues ought to be fixed in r.14. This is revision 14-.\n" );

		} //if ( REUSE == 0 ) { // r16FIX <*><*><*><*><*><*><*><*><*><*><*><*><*><*><*><*><*><*><*><*><*><*> ]

	fprintf( fp_outLOG, "Number Of Hash Collisions(Distinct WORDs - Number Of Trees): %s\n", _ui64toaKAZEcomma(WORDcountDistinct - NumberOfTrees, llTOaDigits, 10) );
	fprintf( fp_outLOG, "Number Of Trees(GREATER THE BETTER): %s\n", _ui64toaKAZEcomma(NumberOfTrees, llTOaDigits, 10) );
	fprintf( fp_outLOG, "Number Of LEAFs(littler THE BETTER) not counting ROOT LEAFs: %s\n", _ui64toaKAZEcomma(NumberOfLEAFs-NumberOfTrees, llTOaDigits, 10) );
	fprintf( fp_outLOG, "Highest Tree not counting ROOT Level i.e. CORONA levels(littler THE BETTER): %s\n", _ui64toaKAZEcomma(LevelsInCorona_Not_Counting_ROOT-1, llTOaDigits, 10) );

	fprintf( fp_outLOG, "Used value for third parameter in KB: %s\n", _ui64toaKAZEcomma(Thunderwith, llTOaDigits, 10) );
	fprintf( fp_outLOG, "Use next time as third parameter: %s\n", _ui64toaKAZEcomma((((BufEnd_64-(unsigned long long)pointerflush_64)+1)>>10)+1, llTOaDigits, 10) );
	fprintf( fp_outLOG, "Total Attempts to Find/Put WORDs into B-trees order 3: %s\n", _ui64toaKAZEcomma(WORDcountAttemptsToPut, llTOaDigits, 10) );

	// External Btrees ]
				if (BSTorBtree == 2) {
	fclose(fp_outRG);
	// r16
	if ( (REUSE == 1) && ((HashInBITS-HashChunkSizeInBITS)==0) ) { // Multiple-passes shouldn't be dumped - it is meaningless, dump when only one pass.
		if( ( fp_outRG = fopen( "Leprechaun_64bit.hsh", "wb+" ) ) == NULL )
		{ printf( "Leprechaun: Can't create file 'Leprechaun_64bit.hsh'.\n" ); return( 1 ); }
		fwrite(pointerflushUNALIGN,  (1<<HashInBITS)*8 + 1 + 64 , 1, fp_outRG);
	fclose(fp_outRG);
	}
				} else { // ########## 64bit memory manipulations [
	free(pointerflushUNALIGN_64);
				} // ########## 64bit memory manipulations ]
	free(pointerflushUNALIGN);
	fclose(fp_outLOG);
	printf( "Leprechaun: Current pass done.\n" );

// 14++++ [
 TotalMemoryNeededForOnePass += (((BufEnd_64-(unsigned long long)pointerflush_64)+1)>>10)+1;
 WORDcountDistinctTOTAL += WORDcountDistinct;
 RipPasses++;
 if (RipPasses <= (1<<(HashInBITS-HashChunkSizeInBITS))-1) goto WhyTheHellForIsNotWorking;
//} // for( RipPasses = 1-1; RipPasses <= (1<<(HashInBITS-HashChunkSizeInBITS))-1; RipPasses++ )
// 14++++ ]
(void) time(&tMainE);
if (tMainE <= tMainB) {tMainE = tMainB; tMainE++;} // This line fixes a bug in r.15
printf( "\nTotal memory needed for one pass: %sKB\n", _ui64toaKAZEcomma(TotalMemoryNeededForOnePass, llTOaDigits2, 10) );
printf( "Total distinct phrases: %s\n", _ui64toaKAZEcomma(WORDcountDistinctTOTAL, llTOaDigits2, 10) );
printf("Total time: %d second(s)\n", (int) tMainE-tMainB);
printf( "Total performance: %sP/s i.e. phrases per second\n", _ui64toaKAZEcomma(WORDcount/((int) tMainE-tMainB), llTOaDigits2, 10) );

	printf( "Leprechaun: Done.\n" );
	exit(0);
} //if (BSTorBtree != 2) {
} // main()

/*
TO BE DONE: Ideal Balancing BST [

link rotR(link h)
  { link x = h->l; h->l = x->r; x->r = h; 
    return x; }

link rotL(link h)
  { link x = h->r; h->r = x->l; x->l = h; 
    return x; }

link partR(link h, int k)
  { int t = h->l->N; 
    if (t > k )
      { h->l = partR(h->l, k); h = rotR(h); }
    if (t < k )
      { h->r = partR(h->r, k-t-1); h = rotL(h); }
    return h;
  }

link balanceR(link h)
  { 
    if (h->N < 2) return h;
    h = partR(h, h->N/2);
    h->l = balanceR(h->l); 
    h->r = balanceR(h->r);
    return h;
  }

TO BE DONE: Ideal Balancing BST ]
*/

/*
#include <stdlib.h>
#include "Item.h"
typedef struct STnode* link;
struct STnode { Item item; link l, r; int N };
static link head, z;
link NEW(Item item, link l, link r, int N)
  { link x = malloc(sizeof *x); 
    x->item = item; x->l = l; x->r = r; x->N = N;
    return x;
  }
void STinit()
  { head = (z = NEW(NULLitem, 0, 0, 0)); }
int STcount() { return head->N; }
Item searchR(link h, Key v)
  { Key t = key(h->item);
    if (h == z) return NULLitem;
    if eq(v, t) return h->item;
    if less(v, t) return searchR(h->l, v);
             else return searchR(h->r, v);
  }
Item STsearch(Key v) 
  { return searchR(head, v); } 
link insertR(link h, Item item)
  { Key v = key(item), t = key(h->item);
    if (h == z) return NEW(item, z, z, 1);
    if less(v, t) 
         h->l = insertR(h->l, item);
    else h->r = insertR(h->r, item);
    (h->N)++; return h;
  }
void STinsert(Item item)
  { head = insertR(head, item); }
*/

/*
int count(link h)
  { 
    if (h == NULL) return 0;
    return count(h->l) + count(h->r) + 1;
  }

int height(link h)
  { int u, v;
    if (h == NULL) return -1;
    u = height(h->l); v = height(h->r);
    if (u > v) return u+1; else return v+1;
  }

void printnode(char c, int h)
  { int i;
    for (i = 0; i < h; i++) printf("  ");
    printf("%c\n", c);
  }

void show(link x, int h)
  { 
    if (x == NULL) { printnode("*", h); return; }
    show(x->r, h+1);    
    printnode(x->item, h);
    show(x->l, h+1);    
  }
*/
