Move top level packages to internal.

This commit is contained in:
Jakob Borg
2014-09-22 21:42:11 +02:00
parent fbdbd722b1
commit 14817e31f6
174 changed files with 252 additions and 254 deletions

View File

@@ -0,0 +1,73 @@
// Copyright (C) 2014 Jakob Borg and Contributors (see the CONTRIBUTORS file).
// All rights reserved. Use of this source code is governed by an MIT-style
// license that can be found in the LICENSE file.
package scanner
import (
"os"
"path/filepath"
"sync"
"github.com/syncthing/syncthing/internal/protocol"
)
// The parallell hasher reads FileInfo structures from the inbox, hashes the
// file to populate the Blocks element and sends it to the outbox. A number of
// workers are used in parallel. The outbox will become closed when the inbox
// is closed and all items handled.
func newParallelHasher(dir string, blockSize, workers int, outbox, inbox chan protocol.FileInfo) {
var wg sync.WaitGroup
wg.Add(workers)
for i := 0; i < workers; i++ {
go func() {
hashFile(dir, blockSize, outbox, inbox)
wg.Done()
}()
}
go func() {
wg.Wait()
close(outbox)
}()
}
func hashFile(dir string, blockSize int, outbox, inbox chan protocol.FileInfo) {
for f := range inbox {
if protocol.IsDirectory(f.Flags) || protocol.IsDeleted(f.Flags) {
outbox <- f
continue
}
fd, err := os.Open(filepath.Join(dir, f.Name))
if err != nil {
if debug {
l.Debugln("open:", err)
}
continue
}
fi, err := fd.Stat()
if err != nil {
fd.Close()
if debug {
l.Debugln("stat:", err)
}
continue
}
blocks, err := Blocks(fd, blockSize, fi.Size())
fd.Close()
if err != nil {
if debug {
l.Debugln("hash error:", f.Name, err)
}
continue
}
f.Blocks = blocks
outbox <- f
}
}

View File

@@ -0,0 +1,90 @@
// Copyright (C) 2014 Jakob Borg and Contributors (see the CONTRIBUTORS file).
// All rights reserved. Use of this source code is governed by an MIT-style
// license that can be found in the LICENSE file.
package scanner
import (
"bytes"
"crypto/sha256"
"io"
"github.com/syncthing/syncthing/internal/protocol"
)
const StandardBlockSize = 128 * 1024
var sha256OfNothing = []uint8{0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55}
// Blocks returns the blockwise hash of the reader.
func Blocks(r io.Reader, blocksize int, sizehint int64) ([]protocol.BlockInfo, error) {
var blocks []protocol.BlockInfo
if sizehint > 0 {
blocks = make([]protocol.BlockInfo, 0, int(sizehint/int64(blocksize)))
}
var offset int64
hf := sha256.New()
for {
lr := &io.LimitedReader{R: r, N: int64(blocksize)}
n, err := io.Copy(hf, lr)
if err != nil {
return nil, err
}
if n == 0 {
break
}
b := protocol.BlockInfo{
Size: uint32(n),
Offset: offset,
Hash: hf.Sum(nil),
}
blocks = append(blocks, b)
offset += int64(n)
hf.Reset()
}
if len(blocks) == 0 {
// Empty file
blocks = append(blocks, protocol.BlockInfo{
Offset: 0,
Size: 0,
Hash: sha256OfNothing,
})
}
return blocks, nil
}
// BlockDiff returns lists of common and missing (to transform src into tgt)
// blocks. Both block lists must have been created with the same block size.
func BlockDiff(src, tgt []protocol.BlockInfo) (have, need []protocol.BlockInfo) {
if len(tgt) == 0 && len(src) != 0 {
return nil, nil
}
// Set the Offset field on each target block
var offset int64
for i := range tgt {
tgt[i].Offset = offset
offset += int64(tgt[i].Size)
}
if len(tgt) != 0 && len(src) == 0 {
// Copy the entire file
return nil, tgt
}
for i := range tgt {
if i >= len(src) || bytes.Compare(tgt[i].Hash, src[i].Hash) != 0 {
// Copy differing block
need = append(need, tgt[i])
} else {
have = append(have, tgt[i])
}
}
return have, need
}

View File

@@ -0,0 +1,122 @@
// Copyright (C) 2014 Jakob Borg and Contributors (see the CONTRIBUTORS file).
// All rights reserved. Use of this source code is governed by an MIT-style
// license that can be found in the LICENSE file.
package scanner
import (
"bytes"
"fmt"
"testing"
"github.com/syncthing/syncthing/internal/protocol"
)
var blocksTestData = []struct {
data []byte
blocksize int
hash []string
}{
{[]byte(""), 1024, []string{
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"}},
{[]byte("contents"), 1024, []string{
"d1b2a59fbea7e20077af9f91b27e95e865061b270be03ff539ab3b73587882e8"}},
{[]byte("contents"), 9, []string{
"d1b2a59fbea7e20077af9f91b27e95e865061b270be03ff539ab3b73587882e8"}},
{[]byte("contents"), 8, []string{
"d1b2a59fbea7e20077af9f91b27e95e865061b270be03ff539ab3b73587882e8"}},
{[]byte("contents"), 7, []string{
"ed7002b439e9ac845f22357d822bac1444730fbdb6016d3ec9432297b9ec9f73",
"043a718774c572bd8a25adbeb1bfcd5c0256ae11cecf9f9c3f925d0e52beaf89"},
},
{[]byte("contents"), 3, []string{
"1143da2bc54c495c4be31d3868785d39ffdfd56df5668f0645d8f14d47647952",
"e4432baa90819aaef51d2a7f8e148bf7e679610f3173752fabb4dcb2d0f418d3",
"44ad63f60af0f6db6fdde6d5186ef78176367df261fa06be3079b6c80c8adba4"},
},
{[]byte("conconts"), 3, []string{
"1143da2bc54c495c4be31d3868785d39ffdfd56df5668f0645d8f14d47647952",
"1143da2bc54c495c4be31d3868785d39ffdfd56df5668f0645d8f14d47647952",
"44ad63f60af0f6db6fdde6d5186ef78176367df261fa06be3079b6c80c8adba4"},
},
{[]byte("contenten"), 3, []string{
"1143da2bc54c495c4be31d3868785d39ffdfd56df5668f0645d8f14d47647952",
"e4432baa90819aaef51d2a7f8e148bf7e679610f3173752fabb4dcb2d0f418d3",
"e4432baa90819aaef51d2a7f8e148bf7e679610f3173752fabb4dcb2d0f418d3"},
},
}
func TestBlocks(t *testing.T) {
for _, test := range blocksTestData {
buf := bytes.NewBuffer(test.data)
blocks, err := Blocks(buf, test.blocksize, 0)
if err != nil {
t.Fatal(err)
}
if l := len(blocks); l != len(test.hash) {
t.Fatalf("Incorrect number of blocks %d != %d", l, len(test.hash))
} else {
i := 0
for off := int64(0); off < int64(len(test.data)); off += int64(test.blocksize) {
if blocks[i].Offset != off {
t.Errorf("Incorrect offset for block %d: %d != %d", i, blocks[i].Offset, off)
}
bs := test.blocksize
if rem := len(test.data) - int(off); bs > rem {
bs = rem
}
if int(blocks[i].Size) != bs {
t.Errorf("Incorrect length for block %d: %d != %d", i, blocks[i].Size, bs)
}
if h := fmt.Sprintf("%x", blocks[i].Hash); h != test.hash[i] {
t.Errorf("Incorrect block hash %q != %q", h, test.hash[i])
}
i++
}
}
}
}
var diffTestData = []struct {
a string
b string
s int
d []protocol.BlockInfo
}{
{"contents", "contents", 1024, []protocol.BlockInfo{}},
{"", "", 1024, []protocol.BlockInfo{}},
{"contents", "contents", 3, []protocol.BlockInfo{}},
{"contents", "cantents", 3, []protocol.BlockInfo{{0, 3, nil}}},
{"contents", "contants", 3, []protocol.BlockInfo{{3, 3, nil}}},
{"contents", "cantants", 3, []protocol.BlockInfo{{0, 3, nil}, {3, 3, nil}}},
{"contents", "", 3, []protocol.BlockInfo{{0, 0, nil}}},
{"", "contents", 3, []protocol.BlockInfo{{0, 3, nil}, {3, 3, nil}, {6, 2, nil}}},
{"con", "contents", 3, []protocol.BlockInfo{{3, 3, nil}, {6, 2, nil}}},
{"contents", "con", 3, nil},
{"contents", "cont", 3, []protocol.BlockInfo{{3, 1, nil}}},
{"cont", "contents", 3, []protocol.BlockInfo{{3, 3, nil}, {6, 2, nil}}},
}
func TestDiff(t *testing.T) {
for i, test := range diffTestData {
a, _ := Blocks(bytes.NewBufferString(test.a), test.s, 0)
b, _ := Blocks(bytes.NewBufferString(test.b), test.s, 0)
_, d := BlockDiff(a, b)
if len(d) != len(test.d) {
t.Fatalf("Incorrect length for diff %d; %d != %d", i, len(d), len(test.d))
} else {
for j := range test.d {
if d[j].Offset != test.d[j].Offset {
t.Errorf("Incorrect offset for diff %d block %d; %d != %d", i, j, d[j].Offset, test.d[j].Offset)
}
if d[j].Size != test.d[j].Size {
t.Errorf("Incorrect length for diff %d block %d; %d != %d", i, j, d[j].Size, test.d[j].Size)
}
}
}
}
}

17
internal/scanner/debug.go Normal file
View File

@@ -0,0 +1,17 @@
// Copyright (C) 2014 Jakob Borg and Contributors (see the CONTRIBUTORS file).
// All rights reserved. Use of this source code is governed by an MIT-style
// license that can be found in the LICENSE file.
package scanner
import (
"os"
"strings"
"github.com/syncthing/syncthing/internal/logger"
)
var (
debug = strings.Contains(os.Getenv("STTRACE"), "scanner") || os.Getenv("STTRACE") == "all"
l = logger.DefaultLogger
)

6
internal/scanner/doc.go Normal file
View File

@@ -0,0 +1,6 @@
// Copyright (C) 2014 Jakob Borg and Contributors (see the CONTRIBUTORS file).
// All rights reserved. Use of this source code is governed by an MIT-style
// license that can be found in the LICENSE file.
// Package scanner implements a file system scanner and hasher.
package scanner

4
internal/scanner/testdata/.stignore vendored Normal file
View File

@@ -0,0 +1,4 @@
#include excludes
bfile
dir1/cfile

1
internal/scanner/testdata/afile vendored Normal file
View File

@@ -0,0 +1 @@
foo

1
internal/scanner/testdata/bfile vendored Normal file
View File

@@ -0,0 +1 @@
bar

1
internal/scanner/testdata/dir1/cfile vendored Normal file
View File

@@ -0,0 +1 @@
baz

1
internal/scanner/testdata/dir1/dfile vendored Normal file
View File

@@ -0,0 +1 @@
quux

1
internal/scanner/testdata/dir2/cfile vendored Normal file
View File

@@ -0,0 +1 @@
baz

1
internal/scanner/testdata/dir2/dfile vendored Normal file
View File

@@ -0,0 +1 @@
quux

1
internal/scanner/testdata/dir3/cfile vendored Normal file
View File

@@ -0,0 +1 @@
baz

1
internal/scanner/testdata/dir3/dfile vendored Normal file
View File

@@ -0,0 +1 @@
quux

2
internal/scanner/testdata/excludes vendored Normal file
View File

@@ -0,0 +1,2 @@
dir2/dfile
#include further-excludes

View File

@@ -0,0 +1 @@
dir3

215
internal/scanner/walk.go Normal file
View File

@@ -0,0 +1,215 @@
// Copyright (C) 2014 Jakob Borg and Contributors (see the CONTRIBUTORS file).
// All rights reserved. Use of this source code is governed by an MIT-style
// license that can be found in the LICENSE file.
package scanner
import (
"errors"
"os"
"path/filepath"
"runtime"
"code.google.com/p/go.text/unicode/norm"
"github.com/syncthing/syncthing/internal/ignore"
"github.com/syncthing/syncthing/internal/lamport"
"github.com/syncthing/syncthing/internal/protocol"
)
type Walker struct {
// Dir is the base directory for the walk
Dir string
// Limit walking to this path within Dir, or no limit if Sub is blank
Sub string
// BlockSize controls the size of the block used when hashing.
BlockSize int
// List of patterns to ignore
Ignores ignore.Patterns
// If TempNamer is not nil, it is used to ignore tempory files when walking.
TempNamer TempNamer
// If CurrentFiler is not nil, it is queried for the current file before rescanning.
CurrentFiler CurrentFiler
// If IgnorePerms is true, changes to permission bits will not be
// detected. Scanned files will get zero permission bits and the
// NoPermissionBits flag set.
IgnorePerms bool
}
type TempNamer interface {
// Temporary returns a temporary name for the filed referred to by filepath.
TempName(path string) string
// IsTemporary returns true if path refers to the name of temporary file.
IsTemporary(path string) bool
}
type CurrentFiler interface {
// CurrentFile returns the file as seen at last scan.
CurrentFile(name string) protocol.FileInfo
}
// Walk returns the list of files found in the local repository by scanning the
// file system. Files are blockwise hashed.
func (w *Walker) Walk() (chan protocol.FileInfo, error) {
if debug {
l.Debugln("Walk", w.Dir, w.Sub, w.BlockSize, w.Ignores)
}
err := checkDir(w.Dir)
if err != nil {
return nil, err
}
files := make(chan protocol.FileInfo)
hashedFiles := make(chan protocol.FileInfo)
newParallelHasher(w.Dir, w.BlockSize, runtime.NumCPU(), hashedFiles, files)
go func() {
hashFiles := w.walkAndHashFiles(files)
filepath.Walk(filepath.Join(w.Dir, w.Sub), hashFiles)
close(files)
}()
return hashedFiles, nil
}
// CleanTempFiles removes all files that match the temporary filename pattern.
func (w *Walker) CleanTempFiles() {
filepath.Walk(w.Dir, w.cleanTempFile)
}
func (w *Walker) walkAndHashFiles(fchan chan protocol.FileInfo) filepath.WalkFunc {
return func(p string, info os.FileInfo, err error) error {
if err != nil {
if debug {
l.Debugln("error:", p, info, err)
}
return nil
}
rn, err := filepath.Rel(w.Dir, p)
if err != nil {
if debug {
l.Debugln("rel error:", p, err)
}
return nil
}
if rn == "." {
return nil
}
if w.TempNamer != nil && w.TempNamer.IsTemporary(rn) {
// A temporary file
if debug {
l.Debugln("temporary:", rn)
}
return nil
}
if sn := filepath.Base(rn); sn == ".stignore" || sn == ".stversions" || w.Ignores.Match(rn) {
// An ignored file
if debug {
l.Debugln("ignored:", rn)
}
if info.IsDir() {
return filepath.SkipDir
}
return nil
}
if (runtime.GOOS == "linux" || runtime.GOOS == "windows") && !norm.NFC.IsNormalString(rn) {
l.Warnf("File %q contains non-NFC UTF-8 sequences and cannot be synced. Consider renaming.", rn)
return nil
}
if info.Mode().IsDir() {
if w.CurrentFiler != nil {
cf := w.CurrentFiler.CurrentFile(rn)
permUnchanged := w.IgnorePerms || !protocol.HasPermissionBits(cf.Flags) || PermsEqual(cf.Flags, uint32(info.Mode()))
if !protocol.IsDeleted(cf.Flags) && protocol.IsDirectory(cf.Flags) && permUnchanged {
return nil
}
}
var flags uint32 = protocol.FlagDirectory
if w.IgnorePerms {
flags |= protocol.FlagNoPermBits | 0777
} else {
flags |= uint32(info.Mode() & os.ModePerm)
}
f := protocol.FileInfo{
Name: rn,
Version: lamport.Default.Tick(0),
Flags: flags,
Modified: info.ModTime().Unix(),
}
if debug {
l.Debugln("dir:", f)
}
fchan <- f
return nil
}
if info.Mode().IsRegular() {
if w.CurrentFiler != nil {
cf := w.CurrentFiler.CurrentFile(rn)
permUnchanged := w.IgnorePerms || !protocol.HasPermissionBits(cf.Flags) || PermsEqual(cf.Flags, uint32(info.Mode()))
if !protocol.IsDeleted(cf.Flags) && cf.Modified == info.ModTime().Unix() && permUnchanged {
return nil
}
if debug {
l.Debugln("rescan:", cf, info.ModTime().Unix(), info.Mode()&os.ModePerm)
}
}
var flags = uint32(info.Mode() & os.ModePerm)
if w.IgnorePerms {
flags = protocol.FlagNoPermBits | 0666
}
fchan <- protocol.FileInfo{
Name: rn,
Version: lamport.Default.Tick(0),
Flags: flags,
Modified: info.ModTime().Unix(),
}
}
return nil
}
}
func (w *Walker) cleanTempFile(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.Mode()&os.ModeType == 0 && w.TempNamer.IsTemporary(path) {
os.Remove(path)
}
return nil
}
func checkDir(dir string) error {
if info, err := os.Lstat(dir); err != nil {
return err
} else if !info.IsDir() {
return errors.New(dir + ": not a directory")
} else if debug {
l.Debugln("checkDir", dir, info)
}
return nil
}
func PermsEqual(a, b uint32) bool {
switch runtime.GOOS {
case "windows":
// There is only writeable and read only, represented for user, group
// and other equally. We only compare against user.
return a&0600 == b&0600
default:
// All bits count
return a&0777 == b&0777
}
}

View File

@@ -0,0 +1,172 @@
// Copyright (C) 2014 Jakob Borg and Contributors (see the CONTRIBUTORS file).
// All rights reserved. Use of this source code is governed by an MIT-style
// license that can be found in the LICENSE file.
package scanner
import (
"bytes"
"fmt"
"path/filepath"
"reflect"
rdebug "runtime/debug"
"sort"
"testing"
"github.com/syncthing/syncthing/internal/ignore"
"github.com/syncthing/syncthing/internal/protocol"
)
type testfile struct {
name string
size int
hash string
}
type testfileList []testfile
var testdata = testfileList{
{"afile", 4, "b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c"},
{"dir1", 128, ""},
{filepath.Join("dir1", "dfile"), 5, "49ae93732fcf8d63fe1cce759664982dbd5b23161f007dba8561862adc96d063"},
{"dir2", 128, ""},
{filepath.Join("dir2", "cfile"), 4, "bf07a7fbb825fc0aae7bf4a1177b2b31fcf8a3feeaf7092761e18c859ee52a9c"},
{"excludes", 37, "df90b52f0c55dba7a7a940affe482571563b1ac57bd5be4d8a0291e7de928e06"},
{"further-excludes", 5, "7eb0a548094fa6295f7fd9200d69973e5f5ec5c04f2a86d998080ac43ecf89f1"},
}
var correctIgnores = map[string][]string{
".": {".*", "quux"},
}
func init() {
// This test runs the risk of entering infinite recursion if it fails.
// Limit the stack size to 10 megs to creash early in that case instead of
// potentially taking down the box...
rdebug.SetMaxStack(10 * 1 << 20)
}
func TestWalkSub(t *testing.T) {
ignores, err := ignore.Load("testdata/.stignore")
if err != nil {
t.Fatal(err)
}
w := Walker{
Dir: "testdata",
Sub: "dir2",
BlockSize: 128 * 1024,
Ignores: ignores,
}
fchan, err := w.Walk()
var files []protocol.FileInfo
for f := range fchan {
files = append(files, f)
}
if err != nil {
t.Fatal(err)
}
// The directory contains two files, where one is ignored from a higher
// level. We should see only the directory and one of the files.
if len(files) != 2 {
t.Fatalf("Incorrect length %d != 2", len(files))
}
if files[0].Name != "dir2" {
t.Errorf("Incorrect file %v != dir2", files[0])
}
if files[1].Name != filepath.Join("dir2", "cfile") {
t.Errorf("Incorrect file %v != dir2/cfile", files[1])
}
}
func TestWalk(t *testing.T) {
ignores, err := ignore.Load("testdata/.stignore")
if err != nil {
t.Fatal(err)
}
t.Log(ignores)
w := Walker{
Dir: "testdata",
BlockSize: 128 * 1024,
Ignores: ignores,
}
fchan, err := w.Walk()
if err != nil {
t.Fatal(err)
}
var tmp []protocol.FileInfo
for f := range fchan {
tmp = append(tmp, f)
}
sort.Sort(fileList(tmp))
files := fileList(tmp).testfiles()
if !reflect.DeepEqual(files, testdata) {
t.Errorf("Walk returned unexpected data\nExpected: %v\nActual: %v", testdata, files)
}
}
func TestWalkError(t *testing.T) {
w := Walker{
Dir: "testdata-missing",
BlockSize: 128 * 1024,
}
_, err := w.Walk()
if err == nil {
t.Error("no error from missing directory")
}
w = Walker{
Dir: "testdata/bar",
BlockSize: 128 * 1024,
}
_, err = w.Walk()
if err == nil {
t.Error("no error from non-directory")
}
}
type fileList []protocol.FileInfo
func (f fileList) Len() int {
return len(f)
}
func (f fileList) Less(a, b int) bool {
return f[a].Name < f[b].Name
}
func (f fileList) Swap(a, b int) {
f[a], f[b] = f[b], f[a]
}
func (l fileList) testfiles() testfileList {
testfiles := make(testfileList, len(l))
for i, f := range l {
if len(f.Blocks) > 1 {
panic("simple test case stuff only supports a single block per file")
}
testfiles[i] = testfile{name: f.Name, size: int(f.Size())}
if len(f.Blocks) == 1 {
testfiles[i].hash = fmt.Sprintf("%x", f.Blocks[0].Hash)
}
}
return testfiles
}
func (l testfileList) String() string {
var b bytes.Buffer
b.WriteString("{\n")
for _, f := range l {
fmt.Fprintf(&b, " %s (%d bytes): %s\n", f.name, f.size, f.hash)
}
b.WriteString("}")
return b.String()
}