diff --git a/cmd/syncthing/model.go b/cmd/syncthing/model.go index acab979c..db6dd29d 100644 --- a/cmd/syncthing/model.go +++ b/cmd/syncthing/model.go @@ -839,8 +839,8 @@ func (m *Model) recomputeNeedForFile(gf scanner.File, toAdd []addOrder, toDelete } else { local, remote := scanner.BlockDiff(lf.Blocks, gf.Blocks) fm := fileMonitor{ - name: gf.Name, - path: path.Clean(path.Join(m.dir, gf.Name)), + name: FSNormalize(gf.Name), + path: FSNormalize(path.Clean(path.Join(m.dir, gf.Name))), global: gf, model: m, localBlocks: local, @@ -875,7 +875,7 @@ func (m *Model) deleteLoop() { if debugPull { dlog.Println("delete", file.Name) } - path := path.Clean(path.Join(m.dir, file.Name)) + path := FSNormalize(path.Clean(path.Join(m.dir, file.Name))) err := os.Remove(path) if err != nil { warnf("%s: %v", file.Name, err) diff --git a/cmd/syncthing/normalize.go b/cmd/syncthing/normalize.go new file mode 100644 index 00000000..ccedaccd --- /dev/null +++ b/cmd/syncthing/normalize.go @@ -0,0 +1,11 @@ +//+build !darwin + +package main + +import "code.google.com/p/go.text/unicode/norm" + +// FSNormalize returns the string with the required unicode normalization for +// the host operating system. +func FSNormalize(s string) string { + return norm.NFC.String(s) +} diff --git a/cmd/syncthing/normalize_darwin.go b/cmd/syncthing/normalize_darwin.go new file mode 100644 index 00000000..77ba8e5c --- /dev/null +++ b/cmd/syncthing/normalize_darwin.go @@ -0,0 +1,11 @@ +//+build darwin + +package main + +import "code.google.com/p/go.text/unicode/norm" + +// FSNormalize returns the string with the required unicode normalization for +// the host operating system. +func FSNormalize(s string) string { + return norm.NFD.String(s) +} diff --git a/protocol/PROTOCOL.md b/protocol/PROTOCOL.md index eb1383c2..1a5cb300 100644 --- a/protocol/PROTOCOL.md +++ b/protocol/PROTOCOL.md @@ -163,8 +163,9 @@ response to the Index message. The Repository field identifies the repository that the index message pertains to. For single repository implementations an empty repository ID is acceptable, or the word "default". The Name is the file name path -relative to the repository root. The combination of Repository and Name -uniquely identifies each file in a cluster. +relative to the repository root. The Name is always in UTF-8 NFC regardless +of operating system or file system specific conventions. The combination of +Repository and Name uniquely identifies each file in a cluster. The Version field is a counter that is initially zero for each file. It is incremented each time a change is detected. The combination of diff --git a/scanner/walk.go b/scanner/walk.go index e32cd083..1bd4124a 100644 --- a/scanner/walk.go +++ b/scanner/walk.go @@ -9,6 +9,8 @@ import ( "path/filepath" "strings" "time" + + "code.google.com/p/go.text/unicode/norm" ) type Walker struct { @@ -136,6 +138,7 @@ func (w *Walker) loadIgnoreFiles(dir string, ign map[string][]string) filepath.W func (w *Walker) walkAndHashFiles(res *[]File, ign map[string][]string) filepath.WalkFunc { return func(p string, info os.FileInfo, err error) error { + if err != nil { if debug { dlog.Println("error:", p, info, err) @@ -151,6 +154,9 @@ func (w *Walker) walkAndHashFiles(res *[]File, ign map[string][]string) filepath return nil } + // Internally, we always use unicode normalization form C + rn = norm.NFC.String(rn) + if w.TempNamer != nil && w.TempNamer.IsTemporary(rn) { if debug { dlog.Println("temporary:", rn)