From 5549fc05a07e1e6609df2c6497bacab139c79119 Mon Sep 17 00:00:00 2001 From: "Carsten Haitzler (Rasterman)" Date: Sun, 5 May 2013 23:10:44 +0900 Subject: [PATCH] add compressed backscroll to terminology... we get about 0.35-0.4 or so ratio of compression. it also now defragments memory used for backscroll and holed it in mmaped blocks so when the blocks go all memory goes used for backscroll. --- src/bin/Makefile.am | 1 + src/bin/lz4/README | 7 + src/bin/lz4/lz4.c | 861 ++++++++++++++++++++++++++++++++++++++++++ src/bin/lz4/lz4.h | 128 +++++++ src/bin/termpty.c | 7 +- src/bin/termpty.h | 31 +- src/bin/termptyops.c | 8 +- src/bin/termptysave.c | 351 ++++++++++++++++- 8 files changed, 1369 insertions(+), 25 deletions(-) create mode 100644 src/bin/lz4/README create mode 100644 src/bin/lz4/lz4.c create mode 100644 src/bin/lz4/lz4.h diff --git a/src/bin/Makefile.am b/src/bin/Makefile.am index 9763b748..c6b2779b 100644 --- a/src/bin/Makefile.am +++ b/src/bin/Makefile.am @@ -37,6 +37,7 @@ termptyops.c termptyops.h \ termptygfx.c termptygfx.h \ termptyext.c termptyext.h \ termptysave.c termptysave.h \ +lz4/lz4.c lz4/lz4.h \ utf8.c utf8.h \ win.c win.h \ utils.c utils.h \ diff --git a/src/bin/lz4/README b/src/bin/lz4/README new file mode 100644 index 00000000..718f7737 --- /dev/null +++ b/src/bin/lz4/README @@ -0,0 +1,7 @@ +This iz the lz4 tree copied in: + http://lz4.googlecode.com/svn/trunk +by: + yann.collet.73@gmail.com +Copyright/licensing info in source files here. + +this was from revision 84. diff --git a/src/bin/lz4/lz4.c b/src/bin/lz4/lz4.c new file mode 100644 index 00000000..a651748a --- /dev/null +++ b/src/bin/lz4/lz4.c @@ -0,0 +1,861 @@ +/* + LZ4 - Fast LZ compression algorithm + Copyright (C) 2011-2012, Yann Collet. + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + - LZ4 source repository : http://code.google.com/p/lz4/ +*/ + +//************************************** +// Tuning parameters +//************************************** +// MEMORY_USAGE : +// Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) +// Increasing memory usage improves compression ratio +// Reduced memory usage can improve speed, due to cache effect +// Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache +#define MEMORY_USAGE 14 + +// NOTCOMPRESSIBLE_DETECTIONLEVEL : +// Decreasing this value will make the algorithm skip faster data segments considered "incompressible" +// This may decrease compression ratio dramatically, but will be faster on incompressible data +// Increasing this value will make the algorithm search more before declaring a segment "incompressible" +// This could improve compression a bit, but will be slower on incompressible data +// The default value (6) is recommended +#define NOTCOMPRESSIBLE_DETECTIONLEVEL 6 + +// BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE : +// This will provide a small boost to performance for big endian cpu, but the resulting compressed stream will be incompatible with little-endian CPU. +// You can set this option to 1 in situations where data will remain within closed environment +// This option is useless on Little_Endian CPU (such as x86) +//#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 + + + +//************************************** +// CPU Feature Detection +//************************************** +// 32 or 64 bits ? +#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || defined(__LP64__) || defined(_LP64) ) // Detects 64 bits mode +# define LZ4_ARCH64 1 +#else +# define LZ4_ARCH64 0 +#endif + +// Little Endian or Big Endian ? +// Note : overwrite the below #define if you know your architecture endianess +#if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || defined(__PPC) || defined(PPC) || defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))) ) +# define LZ4_BIG_ENDIAN 1 +#else +// Little Endian assumed. PDP Endian and other very rare endian format are unsupported. +#endif + +// Unaligned memory access is automatically enabled for "common" CPU, such as x86. +// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected +// If you know your target CPU supports unaligned memory access, you may want to force this option manually to improve performance +#if defined(__ARM_FEATURE_UNALIGNED) +# define LZ4_FORCE_UNALIGNED_ACCESS 1 +#endif + +// Define this parameter if your target system or compiler does not support hardware bit count +#if defined(_MSC_VER) && defined(_WIN32_WCE) // Visual Studio for Windows CE does not support Hardware bit count +# define LZ4_FORCE_SW_BITCOUNT +#endif + + +//************************************** +// Compiler Options +//************************************** +#if __STDC_VERSION__ >= 199901L // C99 +/* "restrict" is a known keyword */ +#else +# define restrict // Disable restrict +#endif + +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +#ifdef _MSC_VER // Visual Studio +# include // For Visual 2005 +# if LZ4_ARCH64 // 64-bit +# pragma intrinsic(_BitScanForward64) // For Visual 2005 +# pragma intrinsic(_BitScanReverse64) // For Visual 2005 +# else +# pragma intrinsic(_BitScanForward) // For Visual 2005 +# pragma intrinsic(_BitScanReverse) // For Visual 2005 +# endif +#endif + +#ifdef _MSC_VER +# define lz4_bswap16(x) _byteswap_ushort(x) +#else +# define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8))) +#endif + +#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) +# define expect(expr,value) (__builtin_expect ((expr),(value)) ) +#else +# define expect(expr,value) (expr) +#endif + +#define likely(expr) expect((expr) != 0, 1) +#define unlikely(expr) expect((expr) != 0, 0) + + +//************************************** +// Includes +//************************************** +#include // for malloc +#include // for memset +#include "lz4.h" + + +//************************************** +// Basic Types +//************************************** +#if defined(_MSC_VER) // Visual Studio does not support 'stdint' natively +# define BYTE unsigned __int8 +# define U16 unsigned __int16 +# define U32 unsigned __int32 +# define S32 __int32 +# define U64 unsigned __int64 +#else +# include +# define BYTE uint8_t +# define U16 uint16_t +# define U32 uint32_t +# define S32 int32_t +# define U64 uint64_t +#endif + +#ifndef LZ4_FORCE_UNALIGNED_ACCESS +# pragma pack(push, 1) +#endif + +typedef struct _U16_S { U16 v; } U16_S; +typedef struct _U32_S { U32 v; } U32_S; +typedef struct _U64_S { U64 v; } U64_S; + +#ifndef LZ4_FORCE_UNALIGNED_ACCESS +# pragma pack(pop) +#endif + +#define A64(x) (((U64_S *)(x))->v) +#define A32(x) (((U32_S *)(x))->v) +#define A16(x) (((U16_S *)(x))->v) + + +//************************************** +// Constants +//************************************** +#define MINMATCH 4 + +#define HASH_LOG (MEMORY_USAGE-2) +#define HASHTABLESIZE (1 << HASH_LOG) +#define HASH_MASK (HASHTABLESIZE - 1) + +#define SKIPSTRENGTH (NOTCOMPRESSIBLE_DETECTIONLEVEL>2?NOTCOMPRESSIBLE_DETECTIONLEVEL:2) +#define STACKLIMIT 13 +#define HEAPMODE (HASH_LOG>STACKLIMIT) // Defines if memory is allocated into the stack (local variable), or into the heap (malloc()). +#define COPYLENGTH 8 +#define LASTLITERALS 5 +#define MFLIMIT (COPYLENGTH+MINMATCH) +#define MINLENGTH (MFLIMIT+1) + +#define MAXD_LOG 16 +#define MAX_DISTANCE ((1 << MAXD_LOG) - 1) + +#define ML_BITS 4 +#define ML_MASK ((1U<> ((MINMATCH*8)-HASH_LOG)) +#define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p)) +#define LZ4_WILDCOPY(s,d,e) do { LZ4_COPYPACKET(s,d) } while (d>3); + #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_clzll(val) >> 3); + #else + int r; + if (!(val>>32)) { r=4; } else { r=0; val>>=32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; + #endif +#else + #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r = 0; + _BitScanForward64( &r, val ); + return (int)(r>>3); + #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_ctzll(val) >> 3); + #else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58]; + #endif +#endif +} + +#else + +static inline int LZ4_NbCommonBytes (register U32 val) +{ +#if defined(LZ4_BIG_ENDIAN) + #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r = 0; + _BitScanReverse( &r, val ); + return (int)(r>>3); + #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_clz(val) >> 3); + #else + int r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; + #endif +#else + #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r = 0; + _BitScanForward( &r, val ); + return (int)(r>>3); + #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_ctz(val) >> 3); + #else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; + #endif +#endif +} + +#endif + + + +//****************************** +// Compression functions +//****************************** + +// LZ4_compressCtx : +// ----------------- +// Compress 'isize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'. +// If it cannot achieve it, compression will stop, and result of the function will be zero. +// return : the number of bytes written in buffer 'dest', or 0 if the compression fails + +static inline int LZ4_compressCtx(void** ctx, + const char* source, + char* dest, + int isize, + int maxOutputSize) +{ +#if HEAPMODE + struct refTables *srt = (struct refTables *) (*ctx); + HTYPE* HashTable; +#else + HTYPE HashTable[HASHTABLESIZE] = {0}; +#endif + + const BYTE* ip = (BYTE*) source; + INITBASE(base); + const BYTE* anchor = ip; + const BYTE* const iend = ip + isize; + const BYTE* const mflimit = iend - MFLIMIT; +#define matchlimit (iend - LASTLITERALS) + + BYTE* op = (BYTE*) dest; + BYTE* const oend = op + maxOutputSize; + + int len, length; + const int skipStrength = SKIPSTRENGTH; + U32 forwardH; + + + // Init + if (isizehashTable); + memset((void*)HashTable, 0, sizeof(srt->hashTable)); +#else + (void) ctx; +#endif + + + // First Byte + HashTable[LZ4_HASH_VALUE(ip)] = ip - base; + ip++; forwardH = LZ4_HASH_VALUE(ip); + + // Main Loop + for ( ; ; ) + { + int findMatchAttempts = (1U << skipStrength) + 3; + const BYTE* forwardIp = ip; + const BYTE* ref; + BYTE* token; + + // Find a match + do { + U32 h = forwardH; + int step = findMatchAttempts++ >> skipStrength; + ip = forwardIp; + forwardIp = ip + step; + + if unlikely(forwardIp > mflimit) { goto _last_literals; } + + forwardH = LZ4_HASH_VALUE(forwardIp); + ref = base + HashTable[h]; + HashTable[h] = ip - base; + + } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); + + // Catch up + while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; } + + // Encode Literal length + length = (int)(ip - anchor); + token = op++; + if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend) return 0; // Check output limit +#ifdef _MSC_VER + if (length>=(int)RUN_MASK) + { + int len = length-RUN_MASK; + *token=(RUN_MASK<254) + { + do { *op++ = 255; len -= 255; } while (len>254); + *op++ = (BYTE)len; + memcpy(op, anchor, length); + op += length; + goto _next_match; + } + else + *op++ = (BYTE)len; + } + else *token = (length<=(int)RUN_MASK) { *token=(RUN_MASK< 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; } + else *token = (length<>8) > oend) return 0; // Check output limit + if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *op++ = 255; *op++ = 255; } if (len > 254) { len-=255; *op++ = 255; } *op++ = (BYTE)len; } + else *token += len; + + // Test end of chunk + if (ip > mflimit) { anchor = ip; break; } + + // Fill table + HashTable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base; + + // Test next position + ref = base + HashTable[LZ4_HASH_VALUE(ip)]; + HashTable[LZ4_HASH_VALUE(ip)] = ip - base; + if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; } + + // Prepare next loop + anchor = ip++; + forwardH = LZ4_HASH_VALUE(ip); + } + +_last_literals: + // Encode Last Literals + { + int lastRun = (int)(iend - anchor); + if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) return 0; + if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK< 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } + else *op++ = (lastRun<> ((MINMATCH*8)-HASHLOG64K)) +#define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p)) +static inline int LZ4_compress64kCtx(void** ctx, + const char* source, + char* dest, + int isize, + int maxOutputSize) +{ +#if HEAPMODE + struct refTables *srt = (struct refTables *) (*ctx); + U16* HashTable; +#else + U16 HashTable[HASH64KTABLESIZE] = {0}; +#endif + + const BYTE* ip = (BYTE*) source; + const BYTE* anchor = ip; + const BYTE* const base = ip; + const BYTE* const iend = ip + isize; + const BYTE* const mflimit = iend - MFLIMIT; +#define matchlimit (iend - LASTLITERALS) + + BYTE* op = (BYTE*) dest; + BYTE* const oend = op + maxOutputSize; + + int len, length; + const int skipStrength = SKIPSTRENGTH; + U32 forwardH; + + + // Init + if (isizehashTable); + memset((void*)HashTable, 0, sizeof(srt->hashTable)); +#else + (void) ctx; +#endif + + + // First Byte + ip++; forwardH = LZ4_HASH64K_VALUE(ip); + + // Main Loop + for ( ; ; ) + { + int findMatchAttempts = (1U << skipStrength) + 3; + const BYTE* forwardIp = ip; + const BYTE* ref; + BYTE* token; + + // Find a match + do { + U32 h = forwardH; + int step = findMatchAttempts++ >> skipStrength; + ip = forwardIp; + forwardIp = ip + step; + + if (forwardIp > mflimit) { goto _last_literals; } + + forwardH = LZ4_HASH64K_VALUE(forwardIp); + ref = base + HashTable[h]; + HashTable[h] = (U16)(ip - base); + + } while (A32(ref) != A32(ip)); + + // Catch up + while ((ip>anchor) && (ref>(BYTE*)source) && (ip[-1]==ref[-1])) { ip--; ref--; } + + // Encode Literal length + length = (int)(ip - anchor); + token = op++; + if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend) return 0; // Check output limit +#ifdef _MSC_VER + if (length>=(int)RUN_MASK) + { + int len = length-RUN_MASK; + *token=(RUN_MASK<254) + { + do { *op++ = 255; len -= 255; } while (len>254); + *op++ = (BYTE)len; + memcpy(op, anchor, length); + op += length; + goto _next_match; + } + else + *op++ = (BYTE)len; + } + else *token = (length<=(int)RUN_MASK) { *token=(RUN_MASK< 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; } + else *token = (length<>8) > oend) return 0; // Check output limit + if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *op++ = 255; *op++ = 255; } if (len > 254) { len-=255; *op++ = 255; } *op++ = (BYTE)len; } + else *token += len; + + // Test end of chunk + if (ip > mflimit) { anchor = ip; break; } + + // Fill table + HashTable[LZ4_HASH64K_VALUE(ip-2)] = (U16)(ip - 2 - base); + + // Test next position + ref = base + HashTable[LZ4_HASH64K_VALUE(ip)]; + HashTable[LZ4_HASH64K_VALUE(ip)] = (U16)(ip - base); + if (A32(ref) == A32(ip)) { token = op++; *token=0; goto _next_match; } + + // Prepare next loop + anchor = ip++; + forwardH = LZ4_HASH64K_VALUE(ip); + } + +_last_literals: + // Encode Last Literals + { + int lastRun = (int)(iend - anchor); + if (op + lastRun + 1 + (lastRun-RUN_MASK+255)/255 > oend) return 0; + if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK< 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } + else *op++ = (lastRun<>ML_BITS)) == RUN_MASK) { for (;(len=*ip++)==255;length+=255){} length += len; } + + // copy literals + cpy = op+length; + if unlikely(cpy>oend-COPYLENGTH) + { + if (cpy != oend) goto _output_error; // Error : we must necessarily stand at EOF + memcpy(op, ip, length); + ip += length; + break; // EOF + } + LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy; + + // get offset + LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2; + if (ref < (BYTE* const)dest) goto _output_error; // Error : offset create reference outside destination buffer + + // get matchlength + if ((length=(token&ML_MASK)) == ML_MASK) { for (;*ip==255;length+=255) {ip++;} length += *ip++; } + + // copy repeated sequence + if unlikely(op-refoend-COPYLENGTH) + { + if (cpy > oend) goto _output_error; // Error : request to write beyond destination buffer + LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH)); + while(op>ML_BITS)) == RUN_MASK) { int s=255; while ((ipoend-COPYLENGTH) || (ip+length>iend-COPYLENGTH)) + { + if (cpy > oend) goto _output_error; // Error : writes beyond output buffer + if (ip+length != iend) goto _output_error; // Error : LZ4 format requires to consume all input at this stage + memcpy(op, ip, length); + op += length; + ip = iend; + break; // Necessarily EOF, due to parsing restrictions + } + LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy; + + // get offset + LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2; + if (ref < (BYTE* const)dest) goto _output_error; // Error : offset creates reference outside of destination buffer + + // get matchlength + if ((length=(token&ML_MASK)) == ML_MASK) { while (ipoend-COPYLENGTH) + { + if (cpy > oend) goto _output_error; // Error : request to write outside of destination buffer + LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH)); + while(opbackmax; i++) { - if (ty->back[i]) termpty_save_free(ty->back[i]); + if (ty->back[i]) + { + termpty_save_free(ty->back[i]); + ty->back[i] = NULL; + } } free(ty->back); + ty->back = NULL; } if (ty->screen) free(ty->screen); if (ty->screen2) free(ty->screen2); diff --git a/src/bin/termpty.h b/src/bin/termpty.h index 7d8eb622..9c172fd7 100644 --- a/src/bin/termpty.h +++ b/src/bin/termpty.h @@ -1,10 +1,11 @@ -typedef struct _Termpty Termpty; -typedef struct _Termcell Termcell; -typedef struct _Termatt Termatt; -typedef struct _Termstate Termstate; -typedef struct _Termsave Termsave; -typedef struct _Termblock Termblock; -typedef struct _Termexp Termexp; +typedef struct _Termpty Termpty; +typedef struct _Termcell Termcell; +typedef struct _Termatt Termatt; +typedef struct _Termstate Termstate; +typedef struct _Termsave Termsave; +typedef struct _Termsavecomp Termsavecomp; +typedef struct _Termblock Termblock; +typedef struct _Termexp Termexp; #define COL_DEF 0 #define COL_BLACK 1 @@ -136,8 +137,20 @@ struct _Termcell struct _Termsave { - int w; - Termcell cell[1]; + unsigned int comp : 1; + unsigned int z : 1; + unsigned int gen : 8; + unsigned int w : 22; + Termcell cell[1]; +}; + +struct _Termsavecomp +{ + unsigned int comp : 1; + unsigned int z : 1; + unsigned int gen : 8; + unsigned int w : 22; // compressed size in bytes + unsigned int wout; // output width in Termcells }; struct _Termblock diff --git a/src/bin/termptyops.c b/src/bin/termptyops.c index 22abb9aa..624ab9df 100644 --- a/src/bin/termptyops.c +++ b/src/bin/termptyops.c @@ -44,9 +44,11 @@ termpty_text_save_top(Termpty *ty, Termcell *cells, ssize_t w_max) if (!ty->back) ty->back = calloc(1, sizeof(Termsave *) * ty->backmax); if (ty->back[ty->backpos]) { - termpty_cell_fill(ty, NULL, ty->back[ty->backpos]->cell, - ty->back[ty->backpos]->w); - termpty_save_free(ty->back[ty->backpos]); + Termsave *ts2; + + ts2 = termpty_save_extract(ty->back[ty->backpos]); + termpty_save_free(ts2); + ty->back[ty->backpos] = NULL; } ty->back[ty->backpos] = ts; ty->backpos++; diff --git a/src/bin/termptysave.c b/src/bin/termptysave.c index 0c0a591c..b409b0b3 100644 --- a/src/bin/termptysave.c +++ b/src/bin/termptysave.c @@ -2,25 +2,322 @@ #include #include "termpty.h" #include "termptysave.h" +#include "lz4/lz4.h" +#include +#define MEM_PAGE_SIZE 4096 +#define MEM_ALLOC_ALIGN 16 +#define MEM_BLOCK_PAGES 32 +#define MEM_BLOCKS 1024 + +typedef struct _Alloc Alloc; + +struct _Alloc +{ + int size, last, count; + short slot; + unsigned char gen; + unsigned char __pad; +}; + +static unsigned char cur_gen = 0; +static Alloc *alloc[MEM_BLOCKS] = { 0 }; + +static void * +_alloc_new(int size, unsigned char gen) +{ + Alloc *al; + unsigned char *ptr; + int newsize, i, firstnull = -1; + + // allocations sized up to nearest size alloc alignment + newsize = MEM_ALLOC_ALIGN * ((size + MEM_ALLOC_ALIGN - 1) / MEM_ALLOC_ALIGN); + for (i = 0; i < MEM_BLOCKS; i++) + { + if (!alloc[i]) + { + if (firstnull < 0) firstnull = i; + continue; + } + // if generation count matches + if (alloc[i]->gen == gen) + { + // if there is space in the block + if ((alloc[i]->size - alloc[i]->last) >= newsize) + { + ptr = (unsigned char *)alloc[i]; + ptr += alloc[i]->last; + alloc[i]->last += newsize; + alloc[i]->count++; + return ptr; + } + } + } + // out of slots for new blocks - no null blocks + if (firstnull < 0) return NULL; + + // so allocate a new block + size = MEM_BLOCK_PAGES * MEM_PAGE_SIZE; + // size up to page size + newsize = MEM_PAGE_SIZE * ((size + MEM_PAGE_SIZE - 1) / MEM_PAGE_SIZE); + // get mmaped anonymous memory so when freed it goes away from the system + ptr = mmap(NULL, newsize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (ptr == MAP_FAILED) return NULL; + + // note - we SHOLD memset to 0, but we are assuming mmap anon give 0 pages + //memset(ptr, 0, newsize); + + al = (Alloc *)ptr; + al->size = newsize; + al->last = sizeof(Alloc); + al->count = 1; + al->slot = firstnull; + al->gen = gen; + alloc[al->slot] = al; + ptr = (unsigned char *)al; + ptr += al->last; + return ptr; +} + +static void +_alloc_free(Alloc *al) +{ + al->count--; + if (al->count > 0) return; + alloc[al->slot] = NULL; + munmap(al, al->size); +} + +static Alloc * +_alloc_find(void *mem) +{ + unsigned char *memptr = mem; + int i; + + for (i = 0; i < MEM_BLOCKS; i++) + { + unsigned char *ptr; + + ptr = (unsigned char *)alloc[i]; + if (!ptr) continue; + if (memptr < ptr) continue; + if ((memptr - ptr) > 0x0fffffff) continue; + if (((size_t)memptr - (size_t)ptr) < (size_t)(alloc[i]->size)) + return alloc[i]; + } + return NULL; +} + +static void * +_mem_new(int size) +{ + void *ptr; + + if (!size) return NULL; + ptr = _alloc_new(size, cur_gen); + return ptr; +} + +static void +_mem_free(void *ptr) +{ + Alloc *al; + + if (!ptr) return; + al = _alloc_find(ptr); + if (!al) + { + ERR("Cannot find %p in alloc blocks", ptr); + return; + } + _alloc_free(al); +} + +static void +_mem_defrag(void) +{ + int i, j = 0; + Alloc *alloc2[MEM_BLOCKS]; + + for (i = 0; i < MEM_BLOCKS; i++) + { + if (alloc[i]) + { +// printf("block %i @ %i [%i/%i] # %i\n", +// j, alloc[i]->gen, alloc[i]->last, alloc[i]->size, alloc[i]->count); + alloc2[j] = alloc[i]; + alloc2[j]->slot = j; + j++; + } + } + // XXX: quicksort blocks with most space at start + for (i = 0; i < j; i++) alloc[i] = alloc2[i]; + for (; i < MEM_BLOCKS; i++) alloc[i] = NULL; +} + +static void +_mem_gen_next(void) +{ + cur_gen++; +} + +static unsigned char +_mem_gen_get(void) +{ + return cur_gen; +} + +static int ts_comp = 0; static int ts_uncomp = 0; +static int ts_freeops = 0; +static int ts_compfreeze = 0; static int freeze = 0; static Eina_List *ptys = NULL; +static Ecore_Idler *idler = NULL; +static Ecore_Timer *timer = NULL; + +static Termsave * +_save_comp(Termsave *ts) +{ + Termsave *ts2; + Termsavecomp *tsc; + + // already compacted + if (ts->comp) return ts; + // make new allocation for new generation + ts_compfreeze++; + if (!ts->z) + { + int bytes; + char *buf; + + buf = alloca(LZ4_compressBound(ts->w * sizeof(Termcell))); + bytes = LZ4_compress((char *)(&(ts->cell[0])), buf, + ts->w * sizeof(Termcell)); + tsc = _mem_new(sizeof(Termsavecomp) + bytes); + if (!tsc) + { + ERR("Big problem. Can't allocate backscroll compress buffer"); + ts2 = ts; + goto done; + } + tsc->comp = 1; + tsc->z = 1; + tsc->gen = _mem_gen_get(); + tsc->w = bytes; + tsc->wout = ts->w; + memcpy(((char *)tsc) + sizeof(Termsavecomp), buf, bytes); + ts2 = (Termsave *)tsc; + } + else + { + ts2 = ts; + tsc = (Termsavecomp *)ts; + ts2 = _mem_new(sizeof(Termsavecomp) + tsc->w); + if (!ts2) + { + ERR("Big problem. Can't allocate backscroll compress/copy buffer"); + ts2 = ts; + goto done; + } + memcpy(ts2, ts, sizeof(Termsavecomp) + tsc->w); + ts2->gen = _mem_gen_get(); + ts2->comp = 1; + } + termpty_save_free(ts); +done: + ts_compfreeze--; + return ts2; +} + +static void +_walk_pty(Termpty *ty) +{ + int i; +// int c0 = 0, c1 = 0; + + if (!ty->back) return; + for (i = 0; i < ty->backmax; i++) + { + Termsavecomp *tsc = (Termsavecomp *)ty->back[i]; + + if (tsc) + { + ty->back[i] = _save_comp(ty->back[i]); + tsc = (Termsavecomp *)ty->back[i]; + if (tsc->comp) ts_comp++; + else ts_uncomp++; +// c0 += tsc->w; +// c1 += tsc->wout * sizeof(Termcell); + } + } +// printf("compress ratio: %1.3f\n", (double)c0 / (double)c1); +} + +static Eina_Bool +_idler(void *data __UNUSED__) +{ + Eina_List *l; + Termpty *ty; +// double t0, t; + + _mem_gen_next(); + +// t0 = ecore_time_get(); + // start afresh and count comp/uncomp; + ts_comp = 0; + ts_uncomp = 0; + EINA_LIST_FOREACH(ptys, l, ty) + { + _walk_pty(ty); + } +// t = ecore_time_get(); +// printf("comp/uncomp %i/%i time spent %1.5f\n", ts_comp, ts_uncomp, t - t0); + _mem_defrag(); + ts_freeops = 0; + + _mem_gen_next(); + + idler = NULL; + return EINA_FALSE; +} + +static Eina_Bool +_timer(void *data __UNUSED__) +{ + if (!idler) idler = ecore_idler_add(_idler, NULL); + timer = NULL; + return EINA_FALSE; +} static void _check_compressor(void) { - if (ts_uncomp > 1024) + if (idler) return; + if ((ts_uncomp > 256) || (ts_freeops > 256)) { - // XXX: if no compressor start one if not frozen + if (timer) ecore_timer_reset(timer); + else timer = ecore_timer_add(0.2, _timer, NULL); } } void termpty_save_freeze(void) { - // XXX: suspend compressor + // XXX: suspend compressor - this probably should be in a thread but right + // now it'll be fine here freeze++; + if (idler) + { + ecore_idler_del(idler); + idler = NULL; + } + if (timer) + { + ecore_timer_del(timer); + timer = NULL; + } } void @@ -50,10 +347,36 @@ termpty_save_unregister(Termpty *ty) Termsave * termpty_save_extract(Termsave *ts) { - // XXX: decompress a Termsave struct from our save store using input ptr as - // handle to find it if (!ts) return NULL; - // XXX: if was compressed ts_comp--; ts_uncomp++; + if (ts->z) + { + Termsavecomp *tsc = (Termsavecomp *)ts; + Termsave *ts2; + char *buf; + int bytes; + + ts2 = _mem_new(sizeof(Termsave) + ((tsc->wout - 1) * sizeof(Termcell))); + if (!ts2) return NULL; + ts2->gen = _mem_gen_get(); + ts2->w = tsc->wout; + buf = ((char *)tsc) + sizeof(Termsavecomp); + bytes = LZ4_uncompress(buf, (char *)(&(ts2->cell[0])), + tsc->wout * sizeof(Termcell)); + if (bytes < 0) + { + memset(&(ts2->cell[0]), 0, tsc->wout * sizeof(Termcell)); +// ERR("Decompress problem in row at byte %i", -bytes); + } + if (ts->comp) ts_comp--; + else ts_uncomp--; + ts_uncomp++; + ts_freeops++; + ts_compfreeze++; + _mem_free(ts); + ts_compfreeze--; + _check_compressor(); + return ts2; + } _check_compressor(); return ts; } @@ -61,10 +384,11 @@ termpty_save_extract(Termsave *ts) Termsave * termpty_save_new(int w) { - Termsave *ts = calloc(1, sizeof(Termsave) + (w - 1) * sizeof(Termcell)); + Termsave *ts = _mem_new(sizeof(Termsave) + ((w - 1) * sizeof(Termcell))); if (!ts) return NULL; + ts->gen = _mem_gen_get(); ts->w = w; - ts_uncomp++; + if (!ts_compfreeze) ts_uncomp++; _check_compressor(); return ts; } @@ -73,9 +397,12 @@ void termpty_save_free(Termsave *ts) { if (!ts) return; - // XXX: if compressed mark region as free, if not then free ts - // XXX: if compresses ts_comp--; else ts_uncomp--; - ts_uncomp--; + if (!ts_compfreeze) + { + if (ts->comp) ts_comp--; + else ts_uncomp--; + ts_freeops++; + } + _mem_free(ts); _check_compressor(); - free(ts); }