Automatically fix file name normalization errors (fixes #430)

This commit is contained in:
Jakob Borg
2015-03-18 23:54:50 +01:00
parent 75523556e8
commit 8311162be3
4 changed files with 168 additions and 11 deletions

View File

@@ -13,6 +13,7 @@ import (
"runtime"
"strings"
"time"
"unicode/utf8"
"github.com/syncthing/protocol"
"github.com/syncthing/syncthing/internal/ignore"
@@ -55,6 +56,9 @@ type Walker struct {
// detected. Scanned files will get zero permission bits and the
// NoPermissionBits flag set.
IgnorePerms bool
// When AutoNormalize is set, file names that are in UTF8 but incorrect
// normalization form will be corrected.
AutoNormalize bool
// Number of routines to use for hashing
Hashers int
}
@@ -149,11 +153,63 @@ func (w *Walker) walkAndHashFiles(fchan chan protocol.FileInfo) filepath.WalkFun
return nil
}
if (runtime.GOOS == "linux" || runtime.GOOS == "windows") && !norm.NFC.IsNormalString(rn) {
l.Warnf("File %q contains non-NFC UTF-8 sequences and cannot be synced. Consider renaming.", rn)
if !utf8.ValidString(rn) {
l.Warnf("File name %q is not in UTF8 encoding; skipping.", rn)
if info.IsDir() {
return filepath.SkipDir
}
return nil
}
var normalizedRn string
if runtime.GOOS == "darwin" {
// Mac OS X file names should always be NFD normalized.
normalizedRn = norm.NFD.String(rn)
} else {
// Every other OS in the known universe uses NFC or just plain
// doesn't bother to define an encoding. In our case *we* do care,
// so we enforce NFC regardless.
normalizedRn = norm.NFC.String(rn)
}
if rn != normalizedRn {
// The file name was not normalized.
if !w.AutoNormalize {
// We're not authorized to do anything about it, so complain and skip.
l.Warnf("File name %q is not in the correct UTF8 normalization form; skipping.", rn)
if info.IsDir() {
return filepath.SkipDir
}
return nil
}
// We will attempt to normalize it.
normalizedPath := filepath.Join(w.Dir, normalizedRn)
if _, err := os.Lstat(normalizedPath); os.IsNotExist(err) {
// Nothing exists with the normalized filename. Good.
if err = os.Rename(p, normalizedPath); err != nil {
l.Infof(`Error normalizing UTF8 encoding of file "%s": %v`, rn, err)
if info.IsDir() {
return filepath.SkipDir
}
return nil
}
l.Infof(`Normalized UTF8 encoding of file name "%s".`, rn)
} else {
// There is something already in the way at the normalized
// file name.
l.Infof(`File "%s" has UTF8 encoding conflict with another file; ignoring.`, rn)
if info.IsDir() {
return filepath.SkipDir
}
return nil
}
rn = normalizedRn
}
// Index wise symlinks are always files, regardless of what the target
// is, because symlinks carry their target path as their content.
if info.Mode()&os.ModeSymlink == os.ModeSymlink {