Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

s2: Clean up decoder #312

Merged
merged 1 commit into from
Jan 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 48 additions & 3 deletions s2/cmd/s2d/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@ package main

import (
"bufio"
"bytes"
"errors"
"flag"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"runtime/debug"
"strings"
"sync"
"time"
Expand Down Expand Up @@ -76,9 +78,52 @@ Options:`)
}

*quiet = *quiet || *stdout
allFiles := files
for i := 0; i < *bench; i++ {
files = append(files, allFiles...)

if *bench > 0 {
debug.SetGCPercent(10)
for _, filename := range files {
switch {
case strings.HasSuffix(filename, ".s2"):
case strings.HasSuffix(filename, ".snappy"):
default:
fmt.Println("Skipping", filename)
continue
}

func() {
if !*quiet {
fmt.Print("Reading ", filename, "...")
}
// Input file.
file, err := os.Open(filename)
exitErr(err)
finfo, err := file.Stat()
exitErr(err)
b := make([]byte, finfo.Size())
_, err = io.ReadFull(file, b)
exitErr(err)
file.Close()

for i := 0; i < *bench; i++ {
if !*quiet {
fmt.Print("\nDecompressing...")
}
r.Reset(bytes.NewBuffer(b))
start := time.Now()
output, err := io.Copy(ioutil.Discard, r)
exitErr(err)
if !*quiet {
elapsed := time.Since(start)
ms := elapsed.Round(time.Millisecond)
mbPerSec := (float64(output) / (1024 * 1024)) / (float64(elapsed) / (float64(time.Second)))
pct := float64(output) * 100 / float64(len(b))
fmt.Printf(" %d -> %d [%.02f%%]; %v, %.01fMB/s", len(b), output, pct, ms, mbPerSec)
}
}
fmt.Println("")
}()
}
os.Exit(0)
}

for _, filename := range files {
Expand Down
14 changes: 4 additions & 10 deletions s2/decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@ var (
ErrTooLarge = errors.New("s2: decoded block is too large")
// ErrUnsupported reports that the input isn't supported.
ErrUnsupported = errors.New("s2: unsupported input")

errUnsupportedLiteralLength = errors.New("s2: unsupported literal length")
)

// DecodedLen returns the length of the decoded block.
Expand All @@ -46,8 +44,7 @@ func decodedLen(src []byte) (blockLen, headerLen int, err error) {
}

const (
decodeErrCodeCorrupt = 1
decodeErrCodeUnsupportedLiteralLength = 2
decodeErrCodeCorrupt = 1
)

// Decode returns the decoded form of src. The returned slice may be a sub-
Expand All @@ -65,13 +62,10 @@ func Decode(dst, src []byte) ([]byte, error) {
} else {
dst = make([]byte, dLen)
}
switch s2Decode(dst, src[s:]) {
case 0:
return dst, nil
case decodeErrCodeUnsupportedLiteralLength:
return nil, errUnsupportedLiteralLength
if s2Decode(dst, src[s:]) != 0 {
return nil, ErrCorrupt
}
return nil, ErrCorrupt
return dst, nil
}

// NewReader returns a new Reader that decompresses from r, using the framing
Expand Down
3 changes: 0 additions & 3 deletions s2/decode_other.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,6 @@ func s2Decode(dst, src []byte) int {
x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
}
length = int(x) + 1
if length <= 0 {
return decodeErrCodeUnsupportedLiteralLength
}
if length > len(dst)-d || length > len(src)-s {
return decodeErrCodeCorrupt
}
Expand Down
40 changes: 14 additions & 26 deletions s2/s2_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1328,16 +1328,6 @@ func benchDecode(b *testing.B, src []byte) {
}
}

func benchDecodeBetter(b *testing.B, src []byte) {
encoded := EncodeBetter(nil, src)
// Bandwidth is in amount of uncompressed data.
b.SetBytes(int64(len(src)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
Decode(src, encoded)
}
}

func benchEncode(b *testing.B, src []byte) {
// Bandwidth is in amount of uncompressed data.
b.SetBytes(int64(len(src)))
Expand Down Expand Up @@ -1386,29 +1376,27 @@ func expand(src []byte, n int) []byte {
return dst
}

func benchWords(b *testing.B, n int, decode bool) {
// Note: the file is OS-language dependent so the resulting values are not
// directly comparable for non-US-English OS installations.
data := expand(readFile(b, "/usr/share/dict/words"), n)
func benchTwain(b *testing.B, n int, decode bool) {
data := expand(readFile(b, "../testdata/Mark.Twain-Tom.Sawyer.txt"), n)
if decode {
benchDecode(b, data)
} else {
benchEncode(b, data)
}
}

func BenchmarkWordsDecode1e1(b *testing.B) { benchWords(b, 1e1, true) }
func BenchmarkWordsDecode1e2(b *testing.B) { benchWords(b, 1e2, true) }
func BenchmarkWordsDecode1e3(b *testing.B) { benchWords(b, 1e3, true) }
func BenchmarkWordsDecode1e4(b *testing.B) { benchWords(b, 1e4, true) }
func BenchmarkWordsDecode1e5(b *testing.B) { benchWords(b, 1e5, true) }
func BenchmarkWordsDecode1e6(b *testing.B) { benchWords(b, 1e6, true) }
func BenchmarkWordsEncode1e1(b *testing.B) { benchWords(b, 1e1, false) }
func BenchmarkWordsEncode1e2(b *testing.B) { benchWords(b, 1e2, false) }
func BenchmarkWordsEncode1e3(b *testing.B) { benchWords(b, 1e3, false) }
func BenchmarkWordsEncode1e4(b *testing.B) { benchWords(b, 1e4, false) }
func BenchmarkWordsEncode1e5(b *testing.B) { benchWords(b, 1e5, false) }
func BenchmarkWordsEncode1e6(b *testing.B) { benchWords(b, 1e6, false) }
func BenchmarkTwainDecode1e1(b *testing.B) { benchTwain(b, 1e1, true) }
func BenchmarkTwainDecode1e2(b *testing.B) { benchTwain(b, 1e2, true) }
func BenchmarkTwainDecode1e3(b *testing.B) { benchTwain(b, 1e3, true) }
func BenchmarkTwainDecode1e4(b *testing.B) { benchTwain(b, 1e4, true) }
func BenchmarkTwainDecode1e5(b *testing.B) { benchTwain(b, 1e5, true) }
func BenchmarkTwainDecode1e6(b *testing.B) { benchTwain(b, 1e6, true) }
func BenchmarkTwainEncode1e1(b *testing.B) { benchTwain(b, 1e1, false) }
func BenchmarkTwainEncode1e2(b *testing.B) { benchTwain(b, 1e2, false) }
func BenchmarkTwainEncode1e3(b *testing.B) { benchTwain(b, 1e3, false) }
func BenchmarkTwainEncode1e4(b *testing.B) { benchTwain(b, 1e4, false) }
func BenchmarkTwainEncode1e5(b *testing.B) { benchTwain(b, 1e5, false) }
func BenchmarkTwainEncode1e6(b *testing.B) { benchTwain(b, 1e6, false) }

func BenchmarkRandomEncodeBlock1MB(b *testing.B) {
rng := rand.New(rand.NewSource(1))
Expand Down