Bug 562590 - Make incomplete byte sequences near HTML EOF emit a REPLACEMENT CHARACTER.
MozReview-Commit-ID: 6NF4rMWxyVu
--- a/parser/html/nsHtml5StreamParser.cpp
+++ b/parser/html/nsHtml5StreamParser.cpp
@@ -829,25 +829,16 @@ nsHtml5StreamParser::WriteStreamBytes(co
NS_ASSERTION(IsParserThread(), "Wrong thread!");
// mLastBuffer should always point to a buffer of the size
// NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE.
if (!mLastBuffer) {
NS_WARNING("mLastBuffer should not be null!");
MarkAsBroken(NS_ERROR_NULL_POINTER);
return NS_ERROR_NULL_POINTER;
}
- if (mLastBuffer->getEnd() == NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE) {
- RefPtr<nsHtml5OwningUTF16Buffer> newBuf =
- nsHtml5OwningUTF16Buffer::FalliblyCreate(
- NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
- if (!newBuf) {
- return NS_ERROR_OUT_OF_MEMORY;
- }
- mLastBuffer = (mLastBuffer->next = newBuf.forget());
- }
size_t totalRead = 0;
auto src = MakeSpan(aFromSegment, aCount);
for (;;) {
auto dst = mLastBuffer->TailAsSpan(NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
uint32_t result;
size_t read;
size_t written;
bool hadErrors;
@@ -857,22 +848,20 @@ nsHtml5StreamParser::WriteStreamBytes(co
src = src.From(read);
totalRead += read;
mLastBuffer->AdvanceEnd(written);
if (result == kOutputFull) {
RefPtr<nsHtml5OwningUTF16Buffer> newBuf =
nsHtml5OwningUTF16Buffer::FalliblyCreate(
NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
if (!newBuf) {
+ MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
return NS_ERROR_OUT_OF_MEMORY;
}
mLastBuffer = (mLastBuffer->next = newBuf.forget());
- // All input may have been consumed if there is a pending surrogate pair
- // that doesn't fit in the output buffer. Loop back to push a zero-length
- // input to the decoder in that case.
} else {
MOZ_ASSERT(totalRead == aCount,
"The Unicode decoder consumed the wrong number of bytes.");
*aWriteCount = totalRead;
return NS_OK;
}
}
}
@@ -1051,16 +1040,53 @@ nsHtml5StreamParser::DoStopRequest()
if (NS_FAILED(rv = FinalizeSniffing(nullptr, 0, &writeCount, 0))) {
MarkAsBroken(rv);
return;
}
} else if (mFeedChardet) {
mChardet->Done();
}
+ MOZ_ASSERT(mUnicodeDecoder, "Should have a decoder after finalizing sniffing.");
+
+ // mLastBuffer should always point to a buffer of the size
+ // NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE.
+ if (!mLastBuffer) {
+ NS_WARNING("mLastBuffer should not be null!");
+ MarkAsBroken(NS_ERROR_NULL_POINTER);
+ return;
+ }
+
+ Span<uint8_t> src; // empty span
+ for (;;) {
+ auto dst = mLastBuffer->TailAsSpan(NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
+ uint32_t result;
+ size_t read;
+ size_t written;
+ bool hadErrors;
+ Tie(result, read, written, hadErrors) =
+ mUnicodeDecoder->DecodeToUTF16(src, dst, true);
+ Unused << hadErrors;
+ MOZ_ASSERT(read == 0, "How come an empty span was read form?");
+ mLastBuffer->AdvanceEnd(written);
+ if (result == kOutputFull) {
+ RefPtr<nsHtml5OwningUTF16Buffer> newBuf =
+ nsHtml5OwningUTF16Buffer::FalliblyCreate(
+ NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
+ if (!newBuf) {
+ MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
+ return;
+ }
+ mLastBuffer = (mLastBuffer->next = newBuf.forget());
+ } else {
+ break;
+ }
+ }
+
+
if (IsTerminatedOrInterrupted()) {
return;
}
ParseAvailableData();
}
class nsHtml5RequestStopper : public Runnable
--- a/testing/web-platform/meta/MANIFEST.json
+++ b/testing/web-platform/meta/MANIFEST.json
@@ -9076,16 +9076,64 @@
[
"/css/css-transitions-1/reference/transition-test-ref.html",
"=="
]
],
{}
]
],
+ "encoding/eof-shift_jis.html": [
+ [
+ "/encoding/eof-shift_jis.html",
+ [
+ [
+ "/encoding/eof-shift_jis-ref.html",
+ "=="
+ ]
+ ],
+ {}
+ ]
+ ],
+ "encoding/eof-utf-8-one.html": [
+ [
+ "/encoding/eof-utf-8-one.html",
+ [
+ [
+ "/encoding/eof-utf-8-one-ref.html",
+ "=="
+ ]
+ ],
+ {}
+ ]
+ ],
+ "encoding/eof-utf-8-three.html": [
+ [
+ "/encoding/eof-utf-8-three.html",
+ [
+ [
+ "/encoding/eof-utf-8-three-ref.html",
+ "=="
+ ]
+ ],
+ {}
+ ]
+ ],
+ "encoding/eof-utf-8-two.html": [
+ [
+ "/encoding/eof-utf-8-two.html",
+ [
+ [
+ "/encoding/eof-utf-8-two-ref.html",
+ "=="
+ ]
+ ],
+ {}
+ ]
+ ],
"html/dom/elements/global-attributes/dir_auto-EN-L.html": [
[
"/html/dom/elements/global-attributes/dir_auto-EN-L.html",
[
[
"/html/dom/elements/global-attributes/dir_auto-EN-L-ref.html",
"=="
]
@@ -46288,16 +46336,36 @@
{}
]
],
"encoding/OWNERS": [
[
{}
]
],
+ "encoding/eof-shift_jis-ref.html": [
+ [
+ {}
+ ]
+ ],
+ "encoding/eof-utf-8-one-ref.html": [
+ [
+ {}
+ ]
+ ],
+ "encoding/eof-utf-8-three-ref.html": [
+ [
+ {}
+ ]
+ ],
+ "encoding/eof-utf-8-two-ref.html": [
+ [
+ {}
+ ]
+ ],
"encoding/resources/encodings.js": [
[
{}
]
],
"encoding/resources/single-byte-raw.py": [
[
{}
@@ -173643,16 +173711,48 @@
"encoding/api-surrogates-utf8.html": [
"e44be4c30e9c65a4b51972efab2e161f166d58a5",
"testharness"
],
"encoding/big5-encoder.html": [
"b9635c43ce159e1961106b039dce0e3d04fade34",
"testharness"
],
+ "encoding/eof-shift_jis-ref.html": [
+ "55ac2be8c2cce3bae0ea1e61f5c330c38adc1e9e",
+ "support"
+ ],
+ "encoding/eof-shift_jis.html": [
+ "c5f6bd10724c2186f9c3347a8eca29627c8fcb5d",
+ "reftest"
+ ],
+ "encoding/eof-utf-8-one-ref.html": [
+ "88e83397cb0e375bc9c84ddedfe60aa5cb11a667",
+ "support"
+ ],
+ "encoding/eof-utf-8-one.html": [
+ "8f89ca912cc4d01dc9cb72e027aef26e1d0cb2a6",
+ "reftest"
+ ],
+ "encoding/eof-utf-8-three-ref.html": [
+ "48dbb873550d0fb33c45a4ab4fff03654b1732b2",
+ "support"
+ ],
+ "encoding/eof-utf-8-three.html": [
+ "b04b8002836ea0f476b65270bb2b5797ffec5fdd",
+ "reftest"
+ ],
+ "encoding/eof-utf-8-two-ref.html": [
+ "ef39f0e0af97a88ceaf652a9df0b2cd07719bff5",
+ "support"
+ ],
+ "encoding/eof-utf-8-two.html": [
+ "dd855efd3ea7bd865c35c105506473fb7731bc36",
+ "reftest"
+ ],
"encoding/gb18030-encoder.html": [
"6f091a64de33492bd17ecbd37f5db0fff9af499a",
"testharness"
],
"encoding/gbk-encoder.html": [
"30bdfc96ffaff0277ceca69aad43d82d5ac691b6",
"testharness"
],
@@ -178656,17 +178756,17 @@
"ac9d401368b75e00adbdf80ee42dd8dce1e48e13",
"testharness"
],
"html/browsers/the-window-object/window-indexed-properties.html": [
"22d5cb06bfc4724d27f565b8ffa2280bf2e8538b",
"testharness"
],
"html/browsers/the-window-object/window-named-properties.html": [
- "03bab12397e43003c6a4d768d2faa580501400bf",
+ "21bb2b7a30381decf8b55152ba33cd723b67b8d5",
"testharness"
],
"html/browsers/the-window-object/window-open-noopener.html": [
"2e20bfcd1dfe9bee00a9747b87cdaf42004d6415",
"testharness"
],
"html/browsers/the-window-object/window-properties.html": [
"ee0ade0a8de422597c362d15cf4a9dd446e4af00",
@@ -203100,17 +203200,17 @@
"722fea5d23733d822faf5717f66f2ff2cccb62a4",
"support"
],
"old-tests/submission/Opera/script_scheduling/scripts/include-10.js": [
"8b70b5a43ab3f1efd9755bf95f9a2713bbe588ce",
"support"
],
"old-tests/submission/Opera/script_scheduling/scripts/include-11.js": [
- "af092f1827bebd1e69f3b914049ad4febfcc8dc5",
+ "f26e65bc15505040371e3026cd09c8eb0df0a1c4",
"support"
],
"old-tests/submission/Opera/script_scheduling/scripts/include-12.js": [
"7dcd929b6b7dd65553f87ec8b0fbf0ec84c335b2",
"support"
],
"old-tests/submission/Opera/script_scheduling/scripts/include-2.js": [
"3a3c18956bb0ccf8c13b301cd3c6f0a6e474ce75",
new file mode 100644
--- /dev/null
+++ b/testing/web-platform/tests/encoding/eof-shift_jis-ref.html
@@ -0,0 +1,4 @@
+<!doctype html>
+<meta charset=shift_jis>
+<title>Shift_JIS file ending with a truncated sequence</title>
+One-byte truncated sequence:�
new file mode 100644
--- /dev/null
+++ b/testing/web-platform/tests/encoding/eof-shift_jis.html
@@ -0,0 +1,5 @@
+<!doctype html>
+<meta charset=shift_jis>
+<title>Shift_JIS file ending with a truncated sequence</title>
+<link rel=match href=/encoding/eof-shift_jis-ref.html>
+One-byte truncated sequence:ƒ
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/testing/web-platform/tests/encoding/eof-utf-8-one-ref.html
@@ -0,0 +1,4 @@
+<!doctype html>
+<meta charset=utf-8>
+<title>UTF-8 file ending with a one-byte truncated sequence</title>
+One-byte truncated sequence:�
new file mode 100644
--- /dev/null
+++ b/testing/web-platform/tests/encoding/eof-utf-8-one.html
@@ -0,0 +1,5 @@
+<!doctype html>
+<meta charset=utf-8>
+<title>UTF-8 file ending with a one-byte truncated sequence</title>
+<link rel=match href="eof-utf-8-one-ref.html">
+One-byte truncated sequence:ð
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/testing/web-platform/tests/encoding/eof-utf-8-three-ref.html
@@ -0,0 +1,4 @@
+<!doctype html>
+<meta charset=utf-8>
+<title>UTF-8 file ending with a three-byte truncated sequence</title>
+Three-byte truncated sequence:�
new file mode 100644
--- /dev/null
+++ b/testing/web-platform/tests/encoding/eof-utf-8-three.html
@@ -0,0 +1,5 @@
+<!doctype html>
+<meta charset=utf-8>
+<title>UTF-8 file ending with a three-byte truncated sequence</title>
+<link rel=match href="eof-utf-8-three-ref.html">
+Three-byte truncated sequence:ðŸ’
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/testing/web-platform/tests/encoding/eof-utf-8-two-ref.html
@@ -0,0 +1,4 @@
+<!doctype html>
+<meta charset=utf-8>
+<title>UTF-8 file ending with a two-byte truncated sequence</title>
+Two-byte truncated sequence:�
new file mode 100644
--- /dev/null
+++ b/testing/web-platform/tests/encoding/eof-utf-8-two.html
@@ -0,0 +1,5 @@
+<!doctype html>
+<meta charset=utf-8>
+<title>UTF-8 file ending with a two-byte truncated sequence</title>
+<link rel=match href="eof-utf-8-two-ref.html">
+Two-byte truncated sequence:ðŸ
\ No newline at end of file