(backend) Import of documents

We can now import documents in formats .docx and .md.
To do so we added a new container "docspec", which
uses the docspec service to convert
these formats to Blocknote format.

More here: #1567 #1569.
This commit is contained in:
Stephan Meijer
2025-11-15 16:29:43 +01:00
committed by Anthony LC
parent 61dbda0bf6
commit b547657efd
12 changed files with 305 additions and 109 deletions

View File

@@ -69,7 +69,7 @@ describe('Server Tests', () => {
const response = await request(app)
.post('/api/convert')
.set('origin', origin)
.set('authorization', 'wrong-api-key')
.set('authorization', `Bearer wrong-api-key`)
.set('content-type', 'application/json');
expect(response.status).toBe(401);
@@ -99,7 +99,7 @@ describe('Server Tests', () => {
const response = await request(app)
.post('/api/convert')
.set('origin', origin)
.set('authorization', apiKey)
.set('authorization', `Bearer ${apiKey}`)
.set('content-type', 'application/json');
expect(response.status).toBe(400);
@@ -114,7 +114,7 @@ describe('Server Tests', () => {
const response = await request(app)
.post('/api/convert')
.set('origin', origin)
.set('authorization', apiKey)
.set('authorization', `Bearer ${apiKey}`)
.set('content-type', 'application/json')
.send('');
@@ -129,9 +129,10 @@ describe('Server Tests', () => {
const response = await request(app)
.post('/api/convert')
.set('origin', origin)
.set('authorization', apiKey)
.set('authorization', `Bearer ${apiKey}`)
.set('content-type', 'image/png')
.send('randomdata');
expect(response.status).toBe(415);
expect(response.body).toStrictEqual({ error: 'Unsupported Content-Type' });
});
@@ -141,38 +142,73 @@ describe('Server Tests', () => {
const response = await request(app)
.post('/api/convert')
.set('origin', origin)
.set('authorization', apiKey)
.set('authorization', `Bearer ${apiKey}`)
.set('content-type', 'text/markdown')
.set('accept', 'image/png')
.send('# Header');
expect(response.status).toBe(406);
expect(response.body).toStrictEqual({ error: 'Unsupported format' });
});
test.each([[apiKey], [`Bearer ${apiKey}`]])(
'POST /api/convert with correct content with Authorization: %s',
async (authHeader) => {
const app = initApp();
test('POST /api/convert BlockNote to Markdown', async () => {
const app = initApp();
const response = await request(app)
.post('/api/convert')
.set('origin', origin)
.set('authorization', `Bearer ${apiKey}`)
.set('content-type', 'application/vnd.blocknote+json')
.set('accept', 'text/markdown')
.send(expectedBlocks);
const response = await request(app)
.post('/api/convert')
.set('Origin', origin)
.set('Authorization', authHeader)
.set('content-type', 'text/markdown')
.set('accept', 'application/vnd.yjs.doc')
.send(expectedMarkdown);
expect(response.status).toBe(200);
expect(response.header['content-type']).toBe(
'text/markdown; charset=utf-8',
);
expect(typeof response.text).toBe('string');
expect(response.text.trim()).toBe(expectedMarkdown);
});
expect(response.status).toBe(200);
expect(response.body).toBeInstanceOf(Buffer);
test('POST /api/convert BlockNote to Yjs', async () => {
const app = initApp();
const editor = ServerBlockNoteEditor.create();
const blocks = await editor.tryParseMarkdownToBlocks(expectedMarkdown);
const response = await request(app)
.post('/api/convert')
.set('origin', origin)
.set('authorization', `Bearer ${apiKey}`)
.set('content-type', 'application/vnd.blocknote+json')
.set('accept', 'application/vnd.yjs.doc')
.send(blocks)
.responseType('blob');
const editor = ServerBlockNoteEditor.create();
const doc = new Y.Doc();
Y.applyUpdate(doc, response.body);
const blocks = editor.yDocToBlocks(doc, 'document-store');
expect(response.status).toBe(200);
expect(response.header['content-type']).toBe('application/vnd.yjs.doc');
expect(blocks).toStrictEqual(expectedBlocks);
},
);
// Decode the Yjs response and verify it contains the correct blocks
const responseBuffer = Buffer.from(response.body as Buffer);
const ydoc = new Y.Doc();
Y.applyUpdate(ydoc, responseBuffer);
const decodedBlocks = editor.yDocToBlocks(ydoc, 'document-store');
expect(decodedBlocks).toStrictEqual(expectedBlocks);
});
test('POST /api/convert BlockNote to HTML', async () => {
const app = initApp();
const response = await request(app)
.post('/api/convert')
.set('origin', origin)
.set('authorization', `Bearer ${apiKey}`)
.set('content-type', 'application/vnd.blocknote+json')
.set('accept', 'text/html')
.send(expectedBlocks);
expect(response.status).toBe(200);
expect(response.header['content-type']).toBe('text/html; charset=utf-8');
expect(typeof response.text).toBe('string');
expect(response.text).toBe(expectedHTML);
});
test('POST /api/convert Yjs to HTML', async () => {
const app = initApp();
@@ -183,10 +219,11 @@ describe('Server Tests', () => {
const response = await request(app)
.post('/api/convert')
.set('origin', origin)
.set('authorization', apiKey)
.set('authorization', `Bearer ${apiKey}`)
.set('content-type', 'application/vnd.yjs.doc')
.set('accept', 'text/html')
.send(Buffer.from(yjsUpdate));
expect(response.status).toBe(200);
expect(response.header['content-type']).toBe('text/html; charset=utf-8');
expect(typeof response.text).toBe('string');
@@ -202,10 +239,11 @@ describe('Server Tests', () => {
const response = await request(app)
.post('/api/convert')
.set('origin', origin)
.set('authorization', apiKey)
.set('authorization', `Bearer ${apiKey}`)
.set('content-type', 'application/vnd.yjs.doc')
.set('accept', 'text/markdown')
.send(Buffer.from(yjsUpdate));
expect(response.status).toBe(200);
expect(response.header['content-type']).toBe(
'text/markdown; charset=utf-8',
@@ -223,15 +261,16 @@ describe('Server Tests', () => {
const response = await request(app)
.post('/api/convert')
.set('origin', origin)
.set('authorization', apiKey)
.set('authorization', `Bearer ${apiKey}`)
.set('content-type', 'application/vnd.yjs.doc')
.set('accept', 'application/json')
.send(Buffer.from(yjsUpdate));
expect(response.status).toBe(200);
expect(response.header['content-type']).toBe(
'application/json; charset=utf-8',
);
expect(Array.isArray(response.body)).toBe(true);
expect(response.body).toBeInstanceOf(Array);
expect(response.body).toStrictEqual(expectedBlocks);
});
@@ -240,15 +279,16 @@ describe('Server Tests', () => {
const response = await request(app)
.post('/api/convert')
.set('origin', origin)
.set('authorization', apiKey)
.set('authorization', `Bearer ${apiKey}`)
.set('content-type', 'text/markdown')
.set('accept', 'application/json')
.send(expectedMarkdown);
expect(response.status).toBe(200);
expect(response.header['content-type']).toBe(
'application/json; charset=utf-8',
);
expect(Array.isArray(response.body)).toBe(true);
expect(response.body).toBeInstanceOf(Array);
expect(response.body).toStrictEqual(expectedBlocks);
});
@@ -257,11 +297,12 @@ describe('Server Tests', () => {
const response = await request(app)
.post('/api/convert')
.set('origin', origin)
.set('authorization', apiKey)
.set('authorization', `Bearer ${apiKey}`)
.set('content-type', 'application/vnd.yjs.doc')
.set('accept', 'application/json')
.send(Buffer.from('notvalidyjs'));
expect(response.status).toBe(400);
expect(response.body).toStrictEqual({ error: 'Invalid Yjs content' });
expect(response.body).toStrictEqual({ error: 'Invalid content' });
});
});

View File

@@ -14,27 +14,115 @@ interface ErrorResponse {
error: string;
}
type ConversionResponseBody = Uint8Array | string | object | ErrorResponse;
interface InputReader {
supportedContentTypes: string[];
read(data: Buffer): Promise<PartialBlock[]>;
}
interface OutputWriter {
supportedContentTypes: string[];
write(blocks: PartialBlock[]): Promise<ConversionResponseBody>;
}
const editor = ServerBlockNoteEditor.create<
DefaultBlockSchema,
DefaultInlineContentSchema,
DefaultStyleSchema
>();
const ContentTypes = {
XMarkdown: 'text/x-markdown',
Markdown: 'text/markdown',
YJS: 'application/vnd.yjs.doc',
FormUrlEncoded: 'application/x-www-form-urlencoded',
OctetStream: 'application/octet-stream',
HTML: 'text/html',
BlockNote: 'application/vnd.blocknote+json',
JSON: 'application/json',
} as const;
const createYDocument = (blocks: PartialBlock[]) =>
editor.blocksToYDoc(blocks, 'document-store');
const readers: InputReader[] = [
{
// application/x-www-form-urlencoded is interpreted as Markdown for backward compatibility
supportedContentTypes: [
ContentTypes.Markdown,
ContentTypes.XMarkdown,
ContentTypes.FormUrlEncoded,
],
read: (data) => editor.tryParseMarkdownToBlocks(data.toString()),
},
{
supportedContentTypes: [ContentTypes.YJS, ContentTypes.OctetStream],
read: async (data) => {
const ydoc = new Y.Doc();
Y.applyUpdate(ydoc, data);
return editor.yDocToBlocks(ydoc, 'document-store') as PartialBlock[];
},
},
{
supportedContentTypes: [ContentTypes.BlockNote],
read: async (data) => JSON.parse(data.toString()),
},
];
const writers: OutputWriter[] = [
{
supportedContentTypes: [ContentTypes.BlockNote, ContentTypes.JSON],
write: async (blocks) => blocks,
},
{
supportedContentTypes: [ContentTypes.YJS, ContentTypes.OctetStream],
write: async (blocks) => Y.encodeStateAsUpdate(createYDocument(blocks)),
},
{
supportedContentTypes: [ContentTypes.Markdown, ContentTypes.XMarkdown],
write: (blocks) => editor.blocksToMarkdownLossy(blocks),
},
{
supportedContentTypes: [ContentTypes.HTML],
write: (blocks) => editor.blocksToHTMLLossy(blocks),
},
];
const normalizeContentType = (value: string) => value.split(';')[0];
export const convertHandler = async (
req: Request<object, Uint8Array | ErrorResponse, Buffer, object>,
res: Response<Uint8Array | string | object | ErrorResponse>,
res: Response<ConversionResponseBody>,
) => {
if (!req.body || req.body.length === 0) {
res.status(400).json({ error: 'Invalid request: missing content' });
return;
}
const contentType = (req.header('content-type') || 'text/markdown').split(
';',
)[0];
const accept = (req.header('accept') || 'application/vnd.yjs.doc').split(
';',
)[0];
const contentType = normalizeContentType(
req.header('content-type') || ContentTypes.Markdown,
);
const reader = readers.find((reader) =>
reader.supportedContentTypes.includes(contentType),
);
if (!reader) {
res.status(415).json({ error: 'Unsupported Content-Type' });
return;
}
const accept = normalizeContentType(req.header('accept') || ContentTypes.YJS);
const writer = writers.find((writer) =>
writer.supportedContentTypes.includes(accept),
);
if (!writer) {
res.status(406).json({ error: 'Unsupported format' });
return;
}
let blocks:
| PartialBlock<
@@ -44,63 +132,23 @@ export const convertHandler = async (
>[]
| null = null;
try {
// First, convert from the input format to blocks
// application/x-www-form-urlencoded is interpreted as Markdown for backward compatibility
if (
contentType === 'text/markdown' ||
contentType === 'application/x-www-form-urlencoded'
) {
blocks = await editor.tryParseMarkdownToBlocks(req.body.toString());
} else if (
contentType === 'application/vnd.yjs.doc' ||
contentType === 'application/octet-stream'
) {
try {
const ydoc = new Y.Doc();
Y.applyUpdate(ydoc, req.body);
blocks = editor.yDocToBlocks(ydoc, 'document-store') as PartialBlock[];
} catch (e) {
logger('Invalid Yjs content:', e);
res.status(400).json({ error: 'Invalid Yjs content' });
return;
}
} else {
res.status(415).json({ error: 'Unsupported Content-Type' });
try {
blocks = await reader.read(req.body);
} catch (e) {
logger('Invalid content:', e);
res.status(400).json({ error: 'Invalid content' });
return;
}
if (!blocks || blocks.length === 0) {
res.status(500).json({ error: 'No valid blocks were generated' });
return;
}
// Then, convert from blocks to the output format
if (accept === 'application/json') {
res.status(200).json(blocks);
} else {
const yDocument = editor.blocksToYDoc(blocks, 'document-store');
if (
accept === 'application/vnd.yjs.doc' ||
accept === 'application/octet-stream'
) {
res
.status(200)
.setHeader('content-type', 'application/octet-stream')
.send(Y.encodeStateAsUpdate(yDocument));
} else if (accept === 'text/markdown') {
res
.status(200)
.setHeader('content-type', 'text/markdown')
.send(await editor.blocksToMarkdownLossy(blocks));
} else if (accept === 'text/html') {
res
.status(200)
.setHeader('content-type', 'text/html')
.send(await editor.blocksToHTMLLossy(blocks));
} else {
res.status(406).json({ error: 'Unsupported format' });
}
}
res
.status(200)
.setHeader('content-type', accept)
.send(await writer.write(blocks));
} catch (e) {
logger('conversion failed:', e);
res.status(500).json({ error: 'An error occurred' });