From 39167e6e2aae3987c0a7390818346071694875c9 Mon Sep 17 00:00:00 2001 From: Kevin Jahns Date: Sat, 22 Apr 2023 18:38:12 +0200 Subject: [PATCH] Implement function that obfuscates a ydoc and scrambles its content --- README.md | 24 ++++++ package-lock.json | 8 +- package.json | 2 +- src/index.js | 2 + src/utils/updates.js | 168 +++++++++++++++++++++++++++++++++++++---- tests/updates.tests.js | 53 ++++++++++++- 6 files changed, 238 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 916c0c51..08b331fc 100644 --- a/README.md +++ b/README.md @@ -753,6 +753,30 @@ currentState1 = Y.mergeUpdates([currentState1, diff2]) currentState1 = Y.mergeUpdates([currentState1, diff1]) ``` +#### Obfuscating Updates + +If one of your users runs into a weird bug (e.g. the rich-text editor throws +error messages), then you don't have to request the full document from your +user. Instead, they can obfuscate the document (i.e. replace the content with +meaningless generated content) before sending it to you. Note that someone might +still deduce the type of content by looking at the general structure of the +document. But this is much better than requesting the original document. + +Obfuscated updates contain all the CRDT-related data that is required for +merging. So it is safe to merge obfuscated updates. + +```javascript +const ydoc = new Y.Doc() +// perform some changes.. +ydoc.getText().insert(0, 'hello world') +const update = Y.encodeStateAsUpdate(ydoc) +// the below update contains scrambled data +const obfuscatedUpdate = Y.obfuscateUpdate(update) +const ydoc2 = new Y.Doc() +Y.applyUpdate(ydoc2, obfuscatedUpdate) +ydoc2.getText().toString() // => "00000000000" +``` + #### Using V2 update format Yjs implements two update formats. By default you are using the V1 update format. diff --git a/package-lock.json b/package-lock.json index 38c077df..7539f5c4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,7 +9,7 @@ "version": "13.5.53", "license": "MIT", "dependencies": { - "lib0": "^0.2.72" + "lib0": "^0.2.74" }, "devDependencies": { "@rollup/plugin-commonjs": "^24.0.1", @@ -2481,9 +2481,9 @@ } }, "node_modules/lib0": { - "version": "0.2.73", - "resolved": "https://registry.npmjs.org/lib0/-/lib0-0.2.73.tgz", - "integrity": "sha512-aJJIElCLWnHMcYZPtsM07QoSfHwpxCy4VUzBYGXFYEmh/h2QS5uZNbCCfL0CqnkOE30b7Tp9DVfjXag+3qzZjQ==", + "version": "0.2.74", + "resolved": "https://registry.npmjs.org/lib0/-/lib0-0.2.74.tgz", + "integrity": "sha512-roj9i46/JwG5ik5KNTkxP2IytlnrssAkD/OhlAVtE+GqectrdkfR+pttszVLrOzMDeXNs1MPt6yo66MUolWSiA==", "dependencies": { "isomorphic.js": "^0.2.4" }, diff --git a/package.json b/package.json index 0f862677..27588fa4 100644 --- a/package.json +++ b/package.json @@ -75,7 +75,7 @@ }, "homepage": "https://docs.yjs.dev", "dependencies": { - "lib0": "^0.2.72" + "lib0": "^0.2.74" }, "devDependencies": { "@rollup/plugin-commonjs": "^24.0.1", diff --git a/src/index.js b/src/index.js index 6624f8cb..d23c37f5 100644 --- a/src/index.js +++ b/src/index.js @@ -90,6 +90,8 @@ export { diffUpdateV2, convertUpdateFormatV1ToV2, convertUpdateFormatV2ToV1, + obfuscateUpdate, + obfuscateUpdateV2, UpdateEncoderV1 } from './internals.js' diff --git a/src/utils/updates.js b/src/utils/updates.js index 950f1413..c64ce355 100644 --- a/src/utils/updates.js +++ b/src/utils/updates.js @@ -2,19 +2,40 @@ import * as binary from 'lib0/binary' import * as decoding from 'lib0/decoding' import * as encoding from 'lib0/encoding' +import * as error from 'lib0/error' +import * as f from 'lib0/function' import * as logging from 'lib0/logging' +import * as map from 'lib0/map' import * as math from 'lib0/math' +import * as string from 'lib0/string' + import { + ContentAny, + ContentBinary, + ContentDeleted, + ContentDoc, + ContentEmbed, + ContentFormat, + ContentJSON, + ContentString, + ContentType, createID, - readItemContent, - readDeleteSet, - writeDeleteSet, - Skip, - mergeDeleteSets, + decodeStateVector, DSEncoderV1, DSEncoderV2, - decodeStateVector, - Item, GC, UpdateDecoderV1, UpdateDecoderV2, UpdateEncoderV1, UpdateEncoderV2 // eslint-disable-line + GC, + Item, + mergeDeleteSets, + readDeleteSet, + readItemContent, + Skip, + UpdateDecoderV1, + UpdateDecoderV2, + UpdateEncoderV1, + UpdateEncoderV2, + writeDeleteSet, + YXmlElement, + YXmlHook } from '../internals.js' /** @@ -552,17 +573,17 @@ const finishLazyStructWriting = (lazyWriter) => { /** * @param {Uint8Array} update + * @param {function(Item|GC|Skip):Item|GC|Skip} blockTransformer * @param {typeof UpdateDecoderV2 | typeof UpdateDecoderV1} YDecoder * @param {typeof UpdateEncoderV2 | typeof UpdateEncoderV1 } YEncoder */ -export const convertUpdateFormat = (update, YDecoder, YEncoder) => { +export const convertUpdateFormat = (update, blockTransformer, YDecoder, YEncoder) => { const updateDecoder = new YDecoder(decoding.createDecoder(update)) const lazyDecoder = new LazyStructReader(updateDecoder, false) const updateEncoder = new YEncoder() const lazyWriter = new LazyStructWriter(updateEncoder) - for (let curr = lazyDecoder.curr; curr !== null; curr = lazyDecoder.next()) { - writeStructToLazyStructWriter(lazyWriter, curr, 0) + writeStructToLazyStructWriter(lazyWriter, blockTransformer(curr), 0) } finishLazyStructWriting(lazyWriter) const ds = readDeleteSet(updateDecoder) @@ -571,11 +592,132 @@ export const convertUpdateFormat = (update, YDecoder, YEncoder) => { } /** - * @param {Uint8Array} update + * @typedef {Object} ObfuscatorOptions + * @property {boolean} [ObfuscatorOptions.formatting=true] + * @property {boolean} [ObfuscatorOptions.subdocs=true] + * @property {boolean} [ObfuscatorOptions.yxml=true] Whether to obfuscate nodeName / hookName */ -export const convertUpdateFormatV1ToV2 = update => convertUpdateFormat(update, UpdateDecoderV1, UpdateEncoderV2) + +/** + * @param {ObfuscatorOptions} obfuscator + */ +const createObfuscator = ({ formatting = true, subdocs = true, yxml = true } = {}) => { + let i = 0 + const mapKeyCache = map.create() + const nodeNameCache = map.create() + const formattingKeyCache = map.create() + const formattingValueCache = map.create() + formattingValueCache.set(null, null) // end of a formatting range should always be the end of a formatting range + /** + * @param {Item|GC|Skip} block + * @return {Item|GC|Skip} + */ + return block => { + switch (block.constructor) { + case GC: + case Skip: + return block + case Item: { + const item = /** @type {Item} */ (block) + const content = item.content + switch (content.constructor) { + case ContentDeleted: + break + case ContentType: { + if (yxml) { + const type = /** @type {ContentType} */ (content).type + if (type instanceof YXmlElement) { + type.nodeName = map.setIfUndefined(nodeNameCache, type.nodeName, () => 'node-' + i) + } + if (type instanceof YXmlHook) { + type.hookName = map.setIfUndefined(nodeNameCache, type.hookName, () => 'hook-' + i) + } + } + break + } + case ContentAny: { + const c = /** @type {ContentAny} */ (content) + c.arr = c.arr.map(() => i) + break + } + case ContentBinary: { + const c = /** @type {ContentBinary} */ (content) + c.content = new Uint8Array([i]) + break + } + case ContentDoc: { + const c = /** @type {ContentDoc} */ (content) + if (subdocs) { + c.opts = {} + c.doc.guid = i + '' + } + break + } + case ContentEmbed: { + const c = /** @type {ContentEmbed} */ (content) + c.embed = {} + break + } + case ContentFormat: { + const c = /** @type {ContentFormat} */ (content) + if (formatting) { + c.key = map.setIfUndefined(formattingKeyCache, c.key, () => i + '') + c.value = map.setIfUndefined(formattingValueCache, c.value, () => ({ i })) + } + break + } + case ContentJSON: { + const c = /** @type {ContentJSON} */ (content) + c.arr = c.arr.map(() => i) + break + } + case ContentString: { + const c = /** @type {ContentString} */ (content) + c.str = string.repeat((i % 10) + '', c.str.length) + break + } + default: + // unknown content type + error.unexpectedCase() + } + if (item.parentSub) { + item.parentSub = map.setIfUndefined(mapKeyCache, item.parentSub, () => i + '') + } + i++ + return block + } + default: + // unknown block-type + error.unexpectedCase() + } + } +} + +/** + * This function obfuscates the content of a Yjs update. This is useful to share + * buggy Yjs documents while significantly limiting the possibility that a + * developer can on the user. Note that it might still be possible to deduce + * some information by analyzing the "structure" of the document or by analyzing + * the typing behavior using the CRDT-related metadata that is still kept fully + * intact. + * + * @param {Uint8Array} update + * @param {ObfuscatorOptions} [opts] + */ +export const obfuscateUpdate = (update, opts) => convertUpdateFormat(update, createObfuscator(opts), UpdateDecoderV1, UpdateEncoderV1) + +/** + * @param {Uint8Array} update + * @param {ObfuscatorOptions} [opts] + */ +export const obfuscateUpdateV2 = (update, opts) => convertUpdateFormat(update, createObfuscator(opts), UpdateDecoderV2, UpdateEncoderV2) /** * @param {Uint8Array} update */ -export const convertUpdateFormatV2ToV1 = update => convertUpdateFormat(update, UpdateDecoderV2, UpdateEncoderV1) +export const convertUpdateFormatV1ToV2 = update => convertUpdateFormat(update, f.id, UpdateDecoderV1, UpdateEncoderV2) + +/** + * @param {Uint8Array} update + */ +export const convertUpdateFormatV2ToV1 = update => convertUpdateFormat(update, f.id, UpdateDecoderV2, UpdateEncoderV1) diff --git a/tests/updates.tests.js b/tests/updates.tests.js index dce09e71..4ba3bab6 100644 --- a/tests/updates.tests.js +++ b/tests/updates.tests.js @@ -4,6 +4,7 @@ import * as Y from '../src/index.js' import { readClientsStructRefs, readDeleteSet, UpdateDecoderV2, UpdateEncoderV2, writeDeleteSet } from '../src/internals.js' import * as encoding from 'lib0/encoding' import * as decoding from 'lib0/decoding' +import * as object from 'lib0/object' /** * @typedef {Object} Enc @@ -138,7 +139,6 @@ export const testKeyEncoding = tc => { */ const checkUpdateCases = (ydoc, updates, enc, hasDeletes) => { const cases = [] - // Case 1: Simple case, simply merge everything cases.push(enc.mergeUpdates(updates)) @@ -304,3 +304,54 @@ export const testMergePendingUpdates = tc => { const yText5 = yDoc5.getText('textBlock') t.compareStrings(yText5.toString(), 'nenor') } + +/** + * @param {t.TestCase} tc + */ +export const testObfuscateUpdates = tc => { + const ydoc = new Y.Doc() + const ytext = ydoc.getText('text') + const ymap = ydoc.getMap('map') + const yarray = ydoc.getArray('array') + // test ytext + ytext.applyDelta([{ insert: 'text', attributes: { bold: true } }, { insert: { href: 'supersecreturl' } }]) + // test ymap + ymap.set('key', 'secret1') + ymap.set('key', 'secret2') + // test yarray with subtype & subdoc + const subtype = new Y.XmlElement('secretnodename') + const subdoc = new Y.Doc({ guid: 'secret' }) + subtype.setAttribute('attr', 'val') + yarray.insert(0, ['teststring', 42, subtype, subdoc]) + // obfuscate the content and put it into a new document + const obfuscatedUpdate = Y.obfuscateUpdate(Y.encodeStateAsUpdate(ydoc)) + const odoc = new Y.Doc() + Y.applyUpdate(odoc, obfuscatedUpdate) + const otext = odoc.getText('text') + const omap = odoc.getMap('map') + const oarray = odoc.getArray('array') + // test ytext + const delta = otext.toDelta() + t.assert(delta.length === 2) + t.assert(delta[0].insert !== 'text' && delta[0].insert.length === 4) + t.assert(object.length(delta[0].attributes) === 1) + t.assert(!object.hasProperty(delta[0].attributes, 'bold')) + t.assert(object.length(delta[1]) === 1) + t.assert(object.hasProperty(delta[1], 'insert')) + // test ymap + t.assert(omap.size === 1) + t.assert(!omap.has('key')) + // test yarray with subtype & subdoc + const result = oarray.toArray() + t.assert(result.length === 4) + t.assert(result[0] !== 'teststring') + t.assert(result[1] !== 42) + const osubtype = /** @type {Y.XmlElement} */ (result[2]) + const osubdoc = result[3] + // test subtype + t.assert(osubtype.nodeName !== subtype.nodeName) + t.assert(object.length(osubtype.getAttributes()) === 1) + t.assert(osubtype.getAttribute('attr') === undefined) + // test subdoc + t.assert(osubdoc.guid !== subdoc.guid) +}