-
-
Notifications
You must be signed in to change notification settings - Fork 35.3k
loader: add experimental text import #62300
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 3 commits
66a1503
a392f74
2da51d1
7fd27d8
5c3addd
63428bb
4a5953c
f8de5d5
42428d9
4a29525
31778cd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -8,6 +8,7 @@ const { | |||||||
| ObjectValues, | ||||||||
| } = primordials; | ||||||||
| const { validateString } = require('internal/validators'); | ||||||||
| const { getOptionValue } = require('internal/options'); | ||||||||
|
|
||||||||
| const { | ||||||||
| ERR_IMPORT_ATTRIBUTE_TYPE_INCOMPATIBLE, | ||||||||
|
|
@@ -32,6 +33,14 @@ const formatTypeMap = { | |||||||
| 'wasm': kImplicitTypeAttribute, // It's unclear whether the HTML spec will require an type attribute or not for Wasm; see https://github.com/WebAssembly/esm-integration/issues/42 | ||||||||
| }; | ||||||||
|
|
||||||||
| function getFormatType(format) { | ||||||||
| if (format === 'text' && getOptionValue('--experimental-import-text')) { | ||||||||
| return 'text'; | ||||||||
| } | ||||||||
|
|
||||||||
| return formatTypeMap[format]; | ||||||||
| } | ||||||||
GeoffreyBooth marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||
|
|
||||||||
| /** | ||||||||
| * The HTML spec disallows the default type to be explicitly specified | ||||||||
| * (for now); so `import './file.js'` is okay but | ||||||||
|
|
@@ -42,7 +51,6 @@ const supportedTypeAttributes = ArrayPrototypeFilter( | |||||||
| ObjectValues(formatTypeMap), | ||||||||
| (type) => type !== kImplicitTypeAttribute); | ||||||||
|
|
||||||||
|
|
||||||||
| /** | ||||||||
| * Test a module's import attributes. | ||||||||
| * @param {string} url The URL of the imported module, for error reporting. | ||||||||
|
|
@@ -60,7 +68,7 @@ function validateAttributes(url, format, | |||||||
| throw new ERR_IMPORT_ATTRIBUTE_UNSUPPORTED(keys[i], importAttributes[keys[i]], url); | ||||||||
| } | ||||||||
| } | ||||||||
| const validType = formatTypeMap[format]; | ||||||||
| const validType = getFormatType(format); | ||||||||
|
||||||||
| const validType = getFormatType(format); | |
| const validType = formatTypeMap[format]; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
. . . and this change can be reverted . . .
| if (!ArrayPrototypeIncludes(supportedTypeAttributes, type) && | |
| !(type === 'text' && getOptionValue('--experimental-import-text'))) { | |
| if (!ArrayPrototypeIncludes(supportedTypeAttributes, type)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -78,6 +78,25 @@ const protocolHandlers = { | |
| 'node:'() { return 'builtin'; }, | ||
| }; | ||
|
|
||
| function getFormatFromImportAttributes(importAttributes) { | ||
| if ( | ||
| !importAttributes || | ||
| !ObjectPrototypeHasOwnProperty(importAttributes, 'type') || | ||
| typeof importAttributes.type !== 'string' | ||
| ) { | ||
| return undefined; | ||
| } | ||
|
|
||
| if ( | ||
| getOptionValue('--experimental-import-text') && | ||
| importAttributes.type === 'text' | ||
| ) { | ||
| return 'text'; | ||
| } | ||
|
|
||
| return undefined; | ||
| } | ||
|
||
|
|
||
| /** | ||
| * Determine whether the given ambiguous source contains CommonJS or ES module syntax. | ||
| * @param {string | Buffer | undefined} [source] | ||
|
|
@@ -238,10 +257,15 @@ function getFileProtocolModuleFormat(url, context = { __proto__: null }, ignoreE | |
|
|
||
| /** | ||
| * @param {URL} url | ||
| * @param {{parentURL: string}} context | ||
| * @param {{parentURL: string, importAttributes?: Record<string, string>}} context | ||
| * @returns {Promise<string> | string | undefined} only works when enabled | ||
| */ | ||
| function defaultGetFormatWithoutErrors(url, context) { | ||
| const format = getFormatFromImportAttributes(context?.importAttributes); | ||
| if (format !== undefined) { | ||
| return format; | ||
| } | ||
|
|
||
| const protocol = url.protocol; | ||
| if (!ObjectPrototypeHasOwnProperty(protocolHandlers, protocol)) { | ||
| return null; | ||
|
|
@@ -251,10 +275,15 @@ function defaultGetFormatWithoutErrors(url, context) { | |
|
|
||
| /** | ||
| * @param {URL} url | ||
| * @param {{parentURL: string}} context | ||
| * @param {{parentURL: string, importAttributes?: Record<string, string>}} context | ||
| * @returns {Promise<string> | string | undefined} only works when enabled | ||
| */ | ||
| function defaultGetFormat(url, context) { | ||
| const format = getFormatFromImportAttributes(context?.importAttributes); | ||
| if (format !== undefined) { | ||
| return format; | ||
| } | ||
|
|
||
| const protocol = url.protocol; | ||
| if (!ObjectPrototypeHasOwnProperty(protocolHandlers, protocol)) { | ||
| return null; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -642,3 +642,13 @@ translators.set('module-typescript', function(url, translateContext, parentURL) | |
| translateContext.source = stripTypeScriptModuleTypes(stringify(source), url); | ||
| return FunctionPrototypeCall(translators.get('module'), this, url, translateContext, parentURL); | ||
| }); | ||
|
|
||
| // Strategy for loading source as text. | ||
| translators.set('text', function textStrategy(url, translateContext) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like they were introduced in 35f92d9 and they were never gated, since it would fail earlier I see now they emitted a warning with |
||
| let { source } = translateContext; | ||
| assertBufferSource(source, true, 'load'); | ||
| source = stringify(source); | ||
| return new ModuleWrap(url, undefined, ['default'], function() { | ||
| this.setExport('default', source); | ||
| }); | ||
| }); | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| // Flags: --expose-internals --experimental-import-text | ||
| 'use strict'; | ||
| require('../common'); | ||
|
|
||
| const assert = require('assert'); | ||
|
|
||
| const { validateAttributes } = require('internal/modules/esm/assert'); | ||
|
|
||
| const url = 'test://'; | ||
|
|
||
| assert.ok(validateAttributes(url, 'text', { type: 'text' })); | ||
|
|
||
| assert.throws(() => validateAttributes(url, 'text', {}), { | ||
| code: 'ERR_IMPORT_ATTRIBUTE_MISSING', | ||
| }); | ||
|
|
||
| assert.throws(() => validateAttributes(url, 'module', { type: 'text' }), { | ||
| code: 'ERR_IMPORT_ATTRIBUTE_TYPE_INCOMPATIBLE', | ||
| }); |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,12 @@ | ||
| import '../common/index.mjs'; | ||
| import assert from 'assert'; | ||
|
|
||
| await assert.rejects( | ||
| import('../fixtures/file-to-read-without-bom.txt', { with: { type: 'text' } }), | ||
| { code: 'ERR_UNKNOWN_FILE_EXTENSION' }, | ||
| ); | ||
|
|
||
| await assert.rejects( | ||
| import('data:text/plain,hello%20world', { with: { type: 'text' } }), | ||
| { code: 'ERR_UNKNOWN_MODULE_FORMAT' }, | ||
| ); |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,51 @@ | ||
| // Flags: --experimental-import-text | ||
| import '../common/index.mjs'; | ||
| import assert from 'assert'; | ||
|
|
||
| import staticText from '../fixtures/file-to-read-without-bom.txt' with { type: 'text' }; | ||
| import staticTextWithBOM from '../fixtures/file-to-read-with-bom.txt' with { type: 'text' }; | ||
|
|
||
| const expectedText = 'abc\ndef\nghi\n'; | ||
|
|
||
| assert.strictEqual(staticText, expectedText); | ||
| assert.strictEqual(staticTextWithBOM, expectedText); | ||
|
|
||
| const dynamicText = await import('../fixtures/file-to-read-without-bom.txt', { | ||
| with: { type: 'text' }, | ||
| }); | ||
| assert.strictEqual(dynamicText.default, expectedText); | ||
|
|
||
| const dataText = await import('data:text/plain,hello%20world', { | ||
| with: { type: 'text' }, | ||
| }); | ||
| assert.strictEqual(dataText.default, 'hello world'); | ||
|
|
||
| const dataJsAsText = await import('data:text/javascript,export{}', { | ||
| with: { type: 'text' }, | ||
| }); | ||
| assert.strictEqual(dataJsAsText.default, 'export{}'); | ||
|
|
||
| const dataInvalidUtf8 = await import('data:text/plain,%66%6f%80%6f', { | ||
| with: { type: 'text' }, | ||
| }); | ||
| assert.strictEqual(dataInvalidUtf8.default, 'fo\ufffdo'); | ||
|
|
||
| const jsAsText = await import('../fixtures/syntax/bad_syntax.js', { | ||
| with: { type: 'text' }, | ||
| }); | ||
| assert.match(jsAsText.default, /^var foo bar;/); | ||
|
|
||
| const jsonAsText = await import('../fixtures/invalid.json', { | ||
| with: { type: 'text' }, | ||
| }); | ||
| assert.match(jsonAsText.default, /"im broken"/); | ||
|
|
||
| await assert.rejects( | ||
| import('data:text/plain,hello%20world'), | ||
| { code: 'ERR_UNKNOWN_MODULE_FORMAT' }, | ||
| ); | ||
|
|
||
| await assert.rejects( | ||
| import('../fixtures/file-to-read-without-bom.txt'), | ||
| { code: 'ERR_UNKNOWN_FILE_EXTENSION' }, | ||
| ); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add a test for what happens when you try to import a binary data file such as an I encourage Node.js to take a strict view here. The following code should throw an exception import text from "image.png" with { type: "text" };There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, I'm not so sure. I would just allow any file to be read as text. You never know it might have a use case. And implementing a list of potential extensions which don't make sense could be a never ending process?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm really not sure why it should - if there's a text representation of a file, it should give it. If it's nonsense, that's the developer's problem to solve. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Replacement characters render the text form of a non-text file unusable and unrecoverable, so it's hard to think of a use case.
I don't suggest filtering by extension. I suggest throwing an exception if any non-text byte is encountered when reading the file.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Which bytes are always, universally, non-text?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This loader unconditionally uses UTF-8 encoding, so presumably "non-text" means "not valid UTF-8", which means this amounts to a request to throw instead of substituting � U+FFFD when bytes which are not valid UTF-8 are encountered (i.e. passing
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. but valid javascript strings can have lone surrogates, so i'm a bit confused why we'd want to do either one - meaning, throwing kills use cases, and silently substituting results in artificially well-formed utf8? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When reading files, it's always necessary to push them through a text decoder, except the unusual case where the file is stored on disk as UTF-16. There is no such thing as lone surrogates in UTF-8. |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since this map is never exported, I think it can have experimental formats? Then we won’t need to override it or check in so many places.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yup, we can extend this directly, done