Kaydet (Commit) 84400eae authored tarafından Jan Holesovsky's avatar Jan Holesovsky

tdf#88821: Implement support for <meta charset="..."> for HTML import.

The editengine HTML import was not handling it at all, and consequently not
setting the right encoding when importing HTML in Calc.

Change-Id: I3ca3dd20f36cfb579fb7ae4cd3da63a69d97601e
üst b297f7bb
......@@ -422,6 +422,7 @@
#define OOO_STRING_SVTOOLS_HTML_O_alt "alt"
#define OOO_STRING_SVTOOLS_HTML_O_axis "axis"
#define OOO_STRING_SVTOOLS_HTML_O_char "char"
#define OOO_STRING_SVTOOLS_HTML_O_charset "charset"
#define OOO_STRING_SVTOOLS_HTML_O_class "class"
#define OOO_STRING_SVTOOLS_HTML_O_code "code"
#define OOO_STRING_SVTOOLS_HTML_O_codetype "codetype"
......
......@@ -308,6 +308,7 @@ HTML_OPTION_STRING_START = HTML_OPTION_BOOL_END,
HTML_O_ALT,
HTML_O_AXIS,
HTML_O_CHAR, // HTML3 Table Model Draft
HTML_O_CHARSET,
HTML_O_CLASS,
HTML_O_CODE, // HotJava
HTML_O_CODETYPE,
......
......@@ -88,6 +88,7 @@ public:
// void testTdf40110();
void testTdf98657();
void testTdf88821();
void testTdf88821_2();
CPPUNIT_TEST_SUITE(ScFiltersTest);
CPPUNIT_TEST(testTdf64229);
......@@ -98,6 +99,7 @@ public:
// CPPUNIT_TEST(testTdf40110);
CPPUNIT_TEST(testTdf98657);
CPPUNIT_TEST(testTdf88821);
CPPUNIT_TEST(testTdf88821_2);
CPPUNIT_TEST_SUITE_END();
private:
uno::Reference<uno::XInterface> m_xCalcComponent;
......@@ -256,6 +258,17 @@ void ScFiltersTest::testTdf88821()
xDocSh->DoClose();
}
void ScFiltersTest::testTdf88821_2()
{
ScDocShellRef xDocSh = loadDoc("tdf88821-2.", FORMAT_HTML);
ScDocument& rDoc = xDocSh->GetDocument();
// A2 should be 'ABCabcČŠŽčšž', not 'ABCabcČŠŽÄヘšž'
CPPUNIT_ASSERT_EQUAL(OStringToOUString("ABCabc\xC4\x8C\xC5\xA0\xC5\xBD\xC4\x8D\xC5\xA1\xC5\xBE", RTL_TEXTENCODING_UTF8), rDoc.GetString(0, 1, 0));
xDocSh->DoClose();
}
ScFiltersTest::ScFiltersTest()
: ScBootstrapFixture( "/sc/qa/unit/data" )
{
......
<html lang="en">
<head>
<meta charset="UTF-8">
</head>
<body>
<table border="1">
<tr>
<td>Text</td>
<td>Decimal</td>
<td>Date</td>
</tr>
<tr>
<td>ABCabcČŠŽčšž</td>
<td>10,50</td>
<td>30.1.2015</td>
</tr>
</table>
</body>
</html>
......@@ -576,6 +576,7 @@ static HTML_TokenEntry aHTMLOptionTab[] = {
{{OOO_STRING_SVTOOLS_HTML_O_alt}, HTML_O_ALT},
{{OOO_STRING_SVTOOLS_HTML_O_axis}, HTML_O_AXIS},
{{OOO_STRING_SVTOOLS_HTML_O_char}, HTML_O_CHAR}, // HTML 3 Table Model Draft
{{OOO_STRING_SVTOOLS_HTML_O_charset}, HTML_O_CHARSET},
{{OOO_STRING_SVTOOLS_HTML_O_class}, HTML_O_CLASS},
{{OOO_STRING_SVTOOLS_HTML_O_code}, HTML_O_CODE}, // HotJava
{{OOO_STRING_SVTOOLS_HTML_O_codetype}, HTML_O_CODETYPE},
......
......@@ -1928,6 +1928,10 @@ bool HTMLParser::ParseMetaOptionsImpl(
case HTML_O_CONTENT:
aContent = aOption.GetString();
break;
case HTML_O_CHARSET:
OString sValue(OUStringToOString(aOption.GetString(), RTL_TEXTENCODING_ASCII_US));
o_rEnc = GetExtendedCompatibilityTextEncoding(rtl_getTextEncodingFromMimeCharset(sValue.getStr()));
break;
}
}
......@@ -1942,7 +1946,6 @@ bool HTMLParser::ParseMetaOptionsImpl(
aContent = convertLineEnd(aContent, GetSystemLineEnd());
}
if ( bHTTPEquiv && i_pHTTPHeader )
{
// Netscape seems to just ignore a closing ", so we do too
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment