Failsafe when splitting surrogate pairs - fixes #248
This commit is contained in:
parent
114f28f48e
commit
e9cb07da55
@ -51,6 +51,17 @@ export class ContentString {
|
||||
splice (offset) {
|
||||
const right = new ContentString(this.str.slice(offset))
|
||||
this.str = this.str.slice(0, offset)
|
||||
|
||||
// Prevent encoding invalid documents because of splitting of surrogate pairs: https://github.com/yjs/yjs/issues/248
|
||||
const firstCharCode = this.str.charCodeAt(offset - 1)
|
||||
if (firstCharCode >= 0xD800 && firstCharCode <= 0xDBFF) {
|
||||
// Last character of the left split is the start of a surrogate utf16/ucs2 pair.
|
||||
// We don't support splitting of surrogate pairs because this may lead to invalid documents.
|
||||
// Replace the invalid character with a unicode replacement character (<28> / U+FFFD)
|
||||
this.str = this.str.slice(0, offset - 1) + '<27>'
|
||||
// replace right as well
|
||||
right.str = '<27>' + right.str.slice(1)
|
||||
}
|
||||
return right
|
||||
}
|
||||
|
||||
|
@ -249,6 +249,8 @@ export const testAppendChars = tc => {
|
||||
t.assert(text0.length === N)
|
||||
}
|
||||
|
||||
const largeDocumentSize = 100000
|
||||
|
||||
const id = Y.createID(0, 0)
|
||||
const c = new Y.ContentString('a')
|
||||
|
||||
@ -256,7 +258,7 @@ const c = new Y.ContentString('a')
|
||||
* @param {t.TestCase} tc
|
||||
*/
|
||||
export const testBestCase = tc => {
|
||||
const N = 2000000
|
||||
const N = largeDocumentSize
|
||||
const items = new Array(N)
|
||||
t.measureTime('time to create two million items in the best case', () => {
|
||||
const parent = /** @type {any} */ ({})
|
||||
@ -293,7 +295,7 @@ const tryGc = () => {
|
||||
* @param {t.TestCase} tc
|
||||
*/
|
||||
export const testLargeFragmentedDocument = tc => {
|
||||
const itemsToInsert = 1000000
|
||||
const itemsToInsert = largeDocumentSize
|
||||
let update = /** @type {any} */ (null)
|
||||
;(() => {
|
||||
const doc1 = new Y.Doc()
|
||||
@ -321,6 +323,40 @@ export const testLargeFragmentedDocument = tc => {
|
||||
})()
|
||||
}
|
||||
|
||||
/**
|
||||
* Splitting surrogates can lead to invalid encoded documents.
|
||||
*
|
||||
* https://github.com/yjs/yjs/issues/248
|
||||
*
|
||||
* @param {t.TestCase} tc
|
||||
*/
|
||||
export const testSplitSurrogateCharacter = tc => {
|
||||
{
|
||||
const { users, text0 } = init(tc, { users: 2 })
|
||||
users[1].disconnect() // disconnecting forces the user to encode the split surrogate
|
||||
text0.insert(0, '👾') // insert surrogate character
|
||||
// split surrogate, which should not lead to an encoding error
|
||||
text0.insert(1, 'hi!')
|
||||
compare(users)
|
||||
}
|
||||
{
|
||||
const { users, text0 } = init(tc, { users: 2 })
|
||||
users[1].disconnect() // disconnecting forces the user to encode the split surrogate
|
||||
text0.insert(0, '👾👾') // insert surrogate character
|
||||
// partially delete surrogate
|
||||
text0.delete(1, 2)
|
||||
compare(users)
|
||||
}
|
||||
{
|
||||
const { users, text0 } = init(tc, { users: 2 })
|
||||
users[1].disconnect() // disconnecting forces the user to encode the split surrogate
|
||||
text0.insert(0, '👾👾') // insert surrogate character
|
||||
// formatting will also split surrogates
|
||||
text0.format(1, 2, { bold: true })
|
||||
compare(users)
|
||||
}
|
||||
}
|
||||
|
||||
// RANDOM TESTS
|
||||
|
||||
let charCounter = 0
|
||||
|
Loading…
x
Reference in New Issue
Block a user