This commit implements a massive refactor of the repository, and moves the build system over to use Mage (magefile.org) which should allow seamless building across multiple platforms.
399 lines
8.1 KiB
Go
399 lines
8.1 KiB
Go
// Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package lzma
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
)
|
|
|
|
const (
|
|
// maximum size of compressed data in a chunk
|
|
maxCompressed = 1 << 16
|
|
// maximum size of uncompressed data in a chunk
|
|
maxUncompressed = 1 << 21
|
|
)
|
|
|
|
// chunkType represents the type of an LZMA2 chunk. Note that this
|
|
// value is an internal representation and no actual encoding of a LZMA2
|
|
// chunk header.
|
|
type chunkType byte
|
|
|
|
// Possible values for the chunk type.
|
|
const (
|
|
// end of stream
|
|
cEOS chunkType = iota
|
|
// uncompressed; reset dictionary
|
|
cUD
|
|
// uncompressed; no reset of dictionary
|
|
cU
|
|
// LZMA compressed; no reset
|
|
cL
|
|
// LZMA compressed; reset state
|
|
cLR
|
|
// LZMA compressed; reset state; new property value
|
|
cLRN
|
|
// LZMA compressed; reset state; new property value; reset dictionary
|
|
cLRND
|
|
)
|
|
|
|
// chunkTypeStrings provide a string representation for the chunk types.
|
|
var chunkTypeStrings = [...]string{
|
|
cEOS: "EOS",
|
|
cU: "U",
|
|
cUD: "UD",
|
|
cL: "L",
|
|
cLR: "LR",
|
|
cLRN: "LRN",
|
|
cLRND: "LRND",
|
|
}
|
|
|
|
// String returns a string representation of the chunk type.
|
|
func (c chunkType) String() string {
|
|
if !(cEOS <= c && c <= cLRND) {
|
|
return "unknown"
|
|
}
|
|
return chunkTypeStrings[c]
|
|
}
|
|
|
|
// Actual encodings for the chunk types in the value. Note that the high
|
|
// uncompressed size bits are stored in the header byte additionally.
|
|
const (
|
|
hEOS = 0
|
|
hUD = 1
|
|
hU = 2
|
|
hL = 1 << 7
|
|
hLR = 1<<7 | 1<<5
|
|
hLRN = 1<<7 | 1<<6
|
|
hLRND = 1<<7 | 1<<6 | 1<<5
|
|
)
|
|
|
|
// errHeaderByte indicates an unsupported value for the chunk header
|
|
// byte. These bytes starts the variable-length chunk header.
|
|
var errHeaderByte = errors.New("lzma: unsupported chunk header byte")
|
|
|
|
// headerChunkType converts the header byte into a chunk type. It
|
|
// ignores the uncompressed size bits in the chunk header byte.
|
|
func headerChunkType(h byte) (c chunkType, err error) {
|
|
if h&hL == 0 {
|
|
// no compression
|
|
switch h {
|
|
case hEOS:
|
|
c = cEOS
|
|
case hUD:
|
|
c = cUD
|
|
case hU:
|
|
c = cU
|
|
default:
|
|
return 0, errHeaderByte
|
|
}
|
|
return
|
|
}
|
|
switch h & hLRND {
|
|
case hL:
|
|
c = cL
|
|
case hLR:
|
|
c = cLR
|
|
case hLRN:
|
|
c = cLRN
|
|
case hLRND:
|
|
c = cLRND
|
|
default:
|
|
return 0, errHeaderByte
|
|
}
|
|
return
|
|
}
|
|
|
|
// uncompressedHeaderLen provides the length of an uncompressed header
|
|
const uncompressedHeaderLen = 3
|
|
|
|
// headerLen returns the length of the LZMA2 header for a given chunk
|
|
// type.
|
|
func headerLen(c chunkType) int {
|
|
switch c {
|
|
case cEOS:
|
|
return 1
|
|
case cU, cUD:
|
|
return uncompressedHeaderLen
|
|
case cL, cLR:
|
|
return 5
|
|
case cLRN, cLRND:
|
|
return 6
|
|
}
|
|
panic(fmt.Errorf("unsupported chunk type %d", c))
|
|
}
|
|
|
|
// chunkHeader represents the contents of a chunk header.
|
|
type chunkHeader struct {
|
|
ctype chunkType
|
|
uncompressed uint32
|
|
compressed uint16
|
|
props Properties
|
|
}
|
|
|
|
// String returns a string representation of the chunk header.
|
|
func (h *chunkHeader) String() string {
|
|
return fmt.Sprintf("%s %d %d %s", h.ctype, h.uncompressed,
|
|
h.compressed, &h.props)
|
|
}
|
|
|
|
// UnmarshalBinary reads the content of the chunk header from the data
|
|
// slice. The slice must have the correct length.
|
|
func (h *chunkHeader) UnmarshalBinary(data []byte) error {
|
|
if len(data) == 0 {
|
|
return errors.New("no data")
|
|
}
|
|
c, err := headerChunkType(data[0])
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
n := headerLen(c)
|
|
if len(data) < n {
|
|
return errors.New("incomplete data")
|
|
}
|
|
if len(data) > n {
|
|
return errors.New("invalid data length")
|
|
}
|
|
|
|
*h = chunkHeader{ctype: c}
|
|
if c == cEOS {
|
|
return nil
|
|
}
|
|
|
|
h.uncompressed = uint32(uint16BE(data[1:3]))
|
|
if c <= cU {
|
|
return nil
|
|
}
|
|
h.uncompressed |= uint32(data[0]&^hLRND) << 16
|
|
|
|
h.compressed = uint16BE(data[3:5])
|
|
if c <= cLR {
|
|
return nil
|
|
}
|
|
|
|
h.props, err = PropertiesForCode(data[5])
|
|
return err
|
|
}
|
|
|
|
// MarshalBinary encodes the chunk header value. The function checks
|
|
// whether the content of the chunk header is correct.
|
|
func (h *chunkHeader) MarshalBinary() (data []byte, err error) {
|
|
if h.ctype > cLRND {
|
|
return nil, errors.New("invalid chunk type")
|
|
}
|
|
if err = h.props.verify(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
data = make([]byte, headerLen(h.ctype))
|
|
|
|
switch h.ctype {
|
|
case cEOS:
|
|
return data, nil
|
|
case cUD:
|
|
data[0] = hUD
|
|
case cU:
|
|
data[0] = hU
|
|
case cL:
|
|
data[0] = hL
|
|
case cLR:
|
|
data[0] = hLR
|
|
case cLRN:
|
|
data[0] = hLRN
|
|
case cLRND:
|
|
data[0] = hLRND
|
|
}
|
|
|
|
putUint16BE(data[1:3], uint16(h.uncompressed))
|
|
if h.ctype <= cU {
|
|
return data, nil
|
|
}
|
|
data[0] |= byte(h.uncompressed>>16) &^ hLRND
|
|
|
|
putUint16BE(data[3:5], h.compressed)
|
|
if h.ctype <= cLR {
|
|
return data, nil
|
|
}
|
|
|
|
data[5] = h.props.Code()
|
|
return data, nil
|
|
}
|
|
|
|
// readChunkHeader reads the chunk header from the IO reader.
|
|
func readChunkHeader(r io.Reader) (h *chunkHeader, err error) {
|
|
p := make([]byte, 1, 6)
|
|
if _, err = io.ReadFull(r, p); err != nil {
|
|
return
|
|
}
|
|
c, err := headerChunkType(p[0])
|
|
if err != nil {
|
|
return
|
|
}
|
|
p = p[:headerLen(c)]
|
|
if _, err = io.ReadFull(r, p[1:]); err != nil {
|
|
return
|
|
}
|
|
h = new(chunkHeader)
|
|
if err = h.UnmarshalBinary(p); err != nil {
|
|
return nil, err
|
|
}
|
|
return h, nil
|
|
}
|
|
|
|
// uint16BE converts a big-endian uint16 representation to an uint16
|
|
// value.
|
|
func uint16BE(p []byte) uint16 {
|
|
return uint16(p[0])<<8 | uint16(p[1])
|
|
}
|
|
|
|
// putUint16BE puts the big-endian uint16 presentation into the given
|
|
// slice.
|
|
func putUint16BE(p []byte, x uint16) {
|
|
p[0] = byte(x >> 8)
|
|
p[1] = byte(x)
|
|
}
|
|
|
|
// chunkState is used to manage the state of the chunks
|
|
type chunkState byte
|
|
|
|
// start and stop define the initial and terminating state of the chunk
|
|
// state
|
|
const (
|
|
start chunkState = 'S'
|
|
stop = 'T'
|
|
)
|
|
|
|
// errors for the chunk state handling
|
|
var (
|
|
errChunkType = errors.New("lzma: unexpected chunk type")
|
|
errState = errors.New("lzma: wrong chunk state")
|
|
)
|
|
|
|
// next transitions state based on chunk type input
|
|
func (c *chunkState) next(ctype chunkType) error {
|
|
switch *c {
|
|
// start state
|
|
case 'S':
|
|
switch ctype {
|
|
case cEOS:
|
|
*c = 'T'
|
|
case cUD:
|
|
*c = 'R'
|
|
case cLRND:
|
|
*c = 'L'
|
|
default:
|
|
return errChunkType
|
|
}
|
|
// normal LZMA mode
|
|
case 'L':
|
|
switch ctype {
|
|
case cEOS:
|
|
*c = 'T'
|
|
case cUD:
|
|
*c = 'R'
|
|
case cU:
|
|
*c = 'U'
|
|
case cL, cLR, cLRN, cLRND:
|
|
break
|
|
default:
|
|
return errChunkType
|
|
}
|
|
// reset required
|
|
case 'R':
|
|
switch ctype {
|
|
case cEOS:
|
|
*c = 'T'
|
|
case cUD, cU:
|
|
break
|
|
case cLRN, cLRND:
|
|
*c = 'L'
|
|
default:
|
|
return errChunkType
|
|
}
|
|
// uncompressed
|
|
case 'U':
|
|
switch ctype {
|
|
case cEOS:
|
|
*c = 'T'
|
|
case cUD:
|
|
*c = 'R'
|
|
case cU:
|
|
break
|
|
case cL, cLR, cLRN, cLRND:
|
|
*c = 'L'
|
|
default:
|
|
return errChunkType
|
|
}
|
|
// terminal state
|
|
case 'T':
|
|
return errChunkType
|
|
default:
|
|
return errState
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// defaultChunkType returns the default chunk type for each chunk state.
|
|
func (c chunkState) defaultChunkType() chunkType {
|
|
switch c {
|
|
case 'S':
|
|
return cLRND
|
|
case 'L', 'U':
|
|
return cL
|
|
case 'R':
|
|
return cLRN
|
|
default:
|
|
// no error
|
|
return cEOS
|
|
}
|
|
}
|
|
|
|
// maxDictCap defines the maximum dictionary capacity supported by the
|
|
// LZMA2 dictionary capacity encoding.
|
|
const maxDictCap = 1<<32 - 1
|
|
|
|
// maxDictCapCode defines the maximum dictionary capacity code.
|
|
const maxDictCapCode = 40
|
|
|
|
// The function decodes the dictionary capacity byte, but doesn't change
|
|
// for the correct range of the given byte.
|
|
func decodeDictCap(c byte) int64 {
|
|
return (2 | int64(c)&1) << (11 + (c>>1)&0x1f)
|
|
}
|
|
|
|
// DecodeDictCap decodes the encoded dictionary capacity. The function
|
|
// returns an error if the code is out of range.
|
|
func DecodeDictCap(c byte) (n int64, err error) {
|
|
if c >= maxDictCapCode {
|
|
if c == maxDictCapCode {
|
|
return maxDictCap, nil
|
|
}
|
|
return 0, errors.New("lzma: invalid dictionary size code")
|
|
}
|
|
return decodeDictCap(c), nil
|
|
}
|
|
|
|
// EncodeDictCap encodes a dictionary capacity. The function returns the
|
|
// code for the capacity that is greater or equal n. If n exceeds the
|
|
// maximum support dictionary capacity, the maximum value is returned.
|
|
func EncodeDictCap(n int64) byte {
|
|
a, b := byte(0), byte(40)
|
|
for a < b {
|
|
c := a + (b-a)>>1
|
|
m := decodeDictCap(c)
|
|
if n <= m {
|
|
if n == m {
|
|
return c
|
|
}
|
|
b = c
|
|
} else {
|
|
a = c + 1
|
|
}
|
|
}
|
|
return a
|
|
}
|