lib/fs: Add case insensitivity to MtimeFS

This is step one of a hundred fifty on the path to case insensitivity. It brings in the basic case folding mechanism and adds it to the mtimefs, as this is something outside the fileset that touches stuff in the database based on name. No effort to convert or handle existing entries when the insensitivity is changed, I don't think we need it... Useless by itself but includes tests and will reduce the review load along the way. GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/4521
2017-11-17 12:10:16 +00:00
parent 7ebf58f1bc
commit ee5d0dd43f
4 changed files with 141 additions and 5 deletions
@@ -0,0 +1,48 @@
+// Copyright (C) 2017 The Syncthing Authors.
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this file,
+// You can obtain one at https://mozilla.org/MPL/2.0/.
+
+package fs
+
+import "testing"
+
+func TestUnicodeLowercase(t *testing.T) {
+	cases := [][2]string{
+		{"", ""},
+		{"hej", "hej"},
+		{"HeJ!@#", "hej!@#"},
+		// Western Europe diacritical stuff is trivial
+		{"ÜBERRÄKSMÖRGÅS", "überräksmörgås"},
+		// Cyrillic seems regular as well
+		{"Привет", "привет"},
+		// Greek has multiple lower case characters for things depending on
+		// context; we should always choose the right one.
+		{"Ὀδυσσεύς", "ὀδυσσεύσ"},
+		{"ὈΔΥΣΣΕΎΣ", "ὀδυσσεύσ"},
+		// German ß doesn't really have an upper case variant, and we
+		// shouldn't mess things up when lower casing it either. We don't
+		// attempt to make ß equivalant to "ss".
+		{"Reichwaldstraße", "reichwaldstraße"},
+		// The Turks do their thing with the Is.... Like the Greek example
+		// we pick just the one canonicalized "i" although you can argue
+		// with this... From what I understand most operating systems don't
+		// get this right anyway.
+		{"İI", "ii"},
+		// Arabic doesn't do case folding.
+		{"العَرَبِيَّة", "العَرَبِيَّة"},
+		// Neither does Hebrew.
+		{"עברית", "עברית"},
+		// Nor Chinese, in any variant.
+		{"汉语/漢語 or 中文", "汉语/漢語 or 中文"},
+		// Niether katakana as far as I can tell.
+		{"チャーハン", "チャーハン"},
+	}
+	for _, tc := range cases {
+		res := UnicodeLowercase(tc[0])
+		if res != tc[1] {
+			t.Errorf("UnicodeLowercase(%q) => %q, expected %q", tc[0], res, tc[1])
+		}
+	}
+}