Kaydet (Commit) 84400eae authored tarafından Jan Holesovsky's avatar Jan Holesovsky

tdf#88821: Implement support for <meta charset="..."> for HTML import.

The editengine HTML import was not handling it at all, and consequently not
setting the right encoding when importing HTML in Calc.

Change-Id: I3ca3dd20f36cfb579fb7ae4cd3da63a69d97601e
üst b297f7bb
...@@ -422,6 +422,7 @@ ...@@ -422,6 +422,7 @@
#define OOO_STRING_SVTOOLS_HTML_O_alt "alt" #define OOO_STRING_SVTOOLS_HTML_O_alt "alt"
#define OOO_STRING_SVTOOLS_HTML_O_axis "axis" #define OOO_STRING_SVTOOLS_HTML_O_axis "axis"
#define OOO_STRING_SVTOOLS_HTML_O_char "char" #define OOO_STRING_SVTOOLS_HTML_O_char "char"
#define OOO_STRING_SVTOOLS_HTML_O_charset "charset"
#define OOO_STRING_SVTOOLS_HTML_O_class "class" #define OOO_STRING_SVTOOLS_HTML_O_class "class"
#define OOO_STRING_SVTOOLS_HTML_O_code "code" #define OOO_STRING_SVTOOLS_HTML_O_code "code"
#define OOO_STRING_SVTOOLS_HTML_O_codetype "codetype" #define OOO_STRING_SVTOOLS_HTML_O_codetype "codetype"
......
...@@ -308,6 +308,7 @@ HTML_OPTION_STRING_START = HTML_OPTION_BOOL_END, ...@@ -308,6 +308,7 @@ HTML_OPTION_STRING_START = HTML_OPTION_BOOL_END,
HTML_O_ALT, HTML_O_ALT,
HTML_O_AXIS, HTML_O_AXIS,
HTML_O_CHAR, // HTML3 Table Model Draft HTML_O_CHAR, // HTML3 Table Model Draft
HTML_O_CHARSET,
HTML_O_CLASS, HTML_O_CLASS,
HTML_O_CODE, // HotJava HTML_O_CODE, // HotJava
HTML_O_CODETYPE, HTML_O_CODETYPE,
......
...@@ -88,6 +88,7 @@ public: ...@@ -88,6 +88,7 @@ public:
// void testTdf40110(); // void testTdf40110();
void testTdf98657(); void testTdf98657();
void testTdf88821(); void testTdf88821();
void testTdf88821_2();
CPPUNIT_TEST_SUITE(ScFiltersTest); CPPUNIT_TEST_SUITE(ScFiltersTest);
CPPUNIT_TEST(testTdf64229); CPPUNIT_TEST(testTdf64229);
...@@ -98,6 +99,7 @@ public: ...@@ -98,6 +99,7 @@ public:
// CPPUNIT_TEST(testTdf40110); // CPPUNIT_TEST(testTdf40110);
CPPUNIT_TEST(testTdf98657); CPPUNIT_TEST(testTdf98657);
CPPUNIT_TEST(testTdf88821); CPPUNIT_TEST(testTdf88821);
CPPUNIT_TEST(testTdf88821_2);
CPPUNIT_TEST_SUITE_END(); CPPUNIT_TEST_SUITE_END();
private: private:
uno::Reference<uno::XInterface> m_xCalcComponent; uno::Reference<uno::XInterface> m_xCalcComponent;
...@@ -256,6 +258,17 @@ void ScFiltersTest::testTdf88821() ...@@ -256,6 +258,17 @@ void ScFiltersTest::testTdf88821()
xDocSh->DoClose(); xDocSh->DoClose();
} }
void ScFiltersTest::testTdf88821_2()
{
ScDocShellRef xDocSh = loadDoc("tdf88821-2.", FORMAT_HTML);
ScDocument& rDoc = xDocSh->GetDocument();
// A2 should be 'ABCabcČŠŽčšž', not 'ABCabcČŠŽÄヘšž'
CPPUNIT_ASSERT_EQUAL(OStringToOUString("ABCabc\xC4\x8C\xC5\xA0\xC5\xBD\xC4\x8D\xC5\xA1\xC5\xBE", RTL_TEXTENCODING_UTF8), rDoc.GetString(0, 1, 0));
xDocSh->DoClose();
}
ScFiltersTest::ScFiltersTest() ScFiltersTest::ScFiltersTest()
: ScBootstrapFixture( "/sc/qa/unit/data" ) : ScBootstrapFixture( "/sc/qa/unit/data" )
{ {
......
<html lang="en">
<head>
<meta charset="UTF-8">
</head>
<body>
<table border="1">
<tr>
<td>Text</td>
<td>Decimal</td>
<td>Date</td>
</tr>
<tr>
<td>ABCabcČŠŽčšž</td>
<td>10,50</td>
<td>30.1.2015</td>
</tr>
</table>
</body>
</html>
...@@ -576,6 +576,7 @@ static HTML_TokenEntry aHTMLOptionTab[] = { ...@@ -576,6 +576,7 @@ static HTML_TokenEntry aHTMLOptionTab[] = {
{{OOO_STRING_SVTOOLS_HTML_O_alt}, HTML_O_ALT}, {{OOO_STRING_SVTOOLS_HTML_O_alt}, HTML_O_ALT},
{{OOO_STRING_SVTOOLS_HTML_O_axis}, HTML_O_AXIS}, {{OOO_STRING_SVTOOLS_HTML_O_axis}, HTML_O_AXIS},
{{OOO_STRING_SVTOOLS_HTML_O_char}, HTML_O_CHAR}, // HTML 3 Table Model Draft {{OOO_STRING_SVTOOLS_HTML_O_char}, HTML_O_CHAR}, // HTML 3 Table Model Draft
{{OOO_STRING_SVTOOLS_HTML_O_charset}, HTML_O_CHARSET},
{{OOO_STRING_SVTOOLS_HTML_O_class}, HTML_O_CLASS}, {{OOO_STRING_SVTOOLS_HTML_O_class}, HTML_O_CLASS},
{{OOO_STRING_SVTOOLS_HTML_O_code}, HTML_O_CODE}, // HotJava {{OOO_STRING_SVTOOLS_HTML_O_code}, HTML_O_CODE}, // HotJava
{{OOO_STRING_SVTOOLS_HTML_O_codetype}, HTML_O_CODETYPE}, {{OOO_STRING_SVTOOLS_HTML_O_codetype}, HTML_O_CODETYPE},
......
...@@ -1928,6 +1928,10 @@ bool HTMLParser::ParseMetaOptionsImpl( ...@@ -1928,6 +1928,10 @@ bool HTMLParser::ParseMetaOptionsImpl(
case HTML_O_CONTENT: case HTML_O_CONTENT:
aContent = aOption.GetString(); aContent = aOption.GetString();
break; break;
case HTML_O_CHARSET:
OString sValue(OUStringToOString(aOption.GetString(), RTL_TEXTENCODING_ASCII_US));
o_rEnc = GetExtendedCompatibilityTextEncoding(rtl_getTextEncodingFromMimeCharset(sValue.getStr()));
break;
} }
} }
...@@ -1942,7 +1946,6 @@ bool HTMLParser::ParseMetaOptionsImpl( ...@@ -1942,7 +1946,6 @@ bool HTMLParser::ParseMetaOptionsImpl(
aContent = convertLineEnd(aContent, GetSystemLineEnd()); aContent = convertLineEnd(aContent, GetSystemLineEnd());
} }
if ( bHTTPEquiv && i_pHTTPHeader ) if ( bHTTPEquiv && i_pHTTPHeader )
{ {
// Netscape seems to just ignore a closing ", so we do too // Netscape seems to just ignore a closing ", so we do too
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment