Kaydet (Commit) aad9c6da authored tarafından Miklos Vajna's avatar Miklos Vajna

sw HTML export: add a filter option to produce XHTML

Add initial support for writing XHTML markup as part of the HTML filter.
This already emits valid XHTML for hello world documents.

Times for 100 hello world inputs: 16032 -> 9957 ms is spent in ODT-load
+ export + close (62% of original).

Change-Id: I51a0a20985958fbc817c196d3a966e55dcb3f13f
Reviewed-on: https://gerrit.libreoffice.org/46567Reviewed-by: 's avatarMiklos Vajna <vmiklos@collabora.co.uk>
Tested-by: 's avatarJenkins <ci@libreoffice.org>
üst 15d134b4
......@@ -24,6 +24,9 @@
#define OOO_STRING_SVTOOLS_HTML_doctype40 \
"HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\""
#define OOO_STRING_SVTOOLS_XHTML_doctype11 \
"html PUBLIC \"-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN\" " \
"\"http://www.w3.org/Math/DTD/mathml2/xhtml-math11-f.dtd\""
// these are only switched on
#define OOO_STRING_SVTOOLS_HTML_area "area"
......@@ -507,6 +510,7 @@
#define OOO_STRING_SVTOOLS_HTML_O_format "format"
#define OOO_STRING_SVTOOLS_HTML_O_frame "frame"
#define OOO_STRING_SVTOOLS_HTML_O_lang "lang"
#define OOO_STRING_SVTOOLS_XHTML_O_lang "xml:lang"
#define OOO_STRING_SVTOOLS_HTML_O_method "method"
#define OOO_STRING_SVTOOLS_HTML_O_rel "rel"
#define OOO_STRING_SVTOOLS_HTML_O_rev "rev"
......
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html>
<head>
<title>Title of document</title>
</head>
<body>hello world</body>
</html>
......@@ -48,6 +48,8 @@ private:
setFilterOptions("SkipImages");
else if (getTestName().indexOf("EmbedImages") != -1)
setFilterOptions("EmbedImages");
else if (getTestName().indexOf("XHTML") != -1)
setFilterOptions("XHTML");
else
setFilterOptions("");
......@@ -306,6 +308,21 @@ DECLARE_HTMLEXPORT_TEST(testEmbedImagesEnabled, "textAndImage.docx")
assertXPath(pDoc, "/html/body/p/img", "src", "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADAAAAAwCAYAAABXAvmHAAAACXBIWXMAAA3ZAAAN2QHmodeGAAAFyUlEQVR4nO1Za2gcVRQ+szuzO9ndZtfEktY2tVorVdNKxRpQweqPmh/B1w8JhCL4R4RYiPhCfKFEfwX/2VYULJIqJFDqg4KC1EaoEoyGhFJClBgbN+a9z9ndeXnOzJ3szCbbQuY2i5ADh5udmXvP953z3cdMRNM04f9sYq0B+LVNArW2TQJXM0EQdmKTQL+Mi4V2PWJwJ4CgBUkKdImi9F48HpGikTptfj4l19fLX2cyxeeQyALPeNwJxOOx/r17djz6+afdsX233QBg5CCXWYS3es62n/js4mXkdyeSmOMVjysBURQ7djVvbRv87p2oLJUAtHmLQDSUg963W8MhMSscPzXWh48e4RaT10BkKJkXens6orKYRvBZC7zlut2+231r6MOTI4exCgmswjKPmFwJFIvFu+5pqbczr7vB22QkMwd7d8vK2Hj+AD5+gUdMrgRCUnD+yt9T8ZubGsrAHSKMTHK2SDGTvGJyJaBp2g/9Z4abH7j7YMjJupvA0Mgi5BVDxUcneMXkSiCTVV/75ItLTx1qCTR2PpbwEJiYTMHjXamcUjSfMTmeIH0TwAm5H5s96JPou3N57f3n3xh99URfuL79IVmOR1UYGlXUL89phqrCabuLcAjbRvQCcjlfMwII5PCWaPib1ntv0S4O/Skn4jHhjn27FMPQ4d/ZJePjgWzJNExBDCW0gwciWp0ceLpQLHaMjM3UNTeZhcU0CLGI8FE2b75SEwJhSWx/ueuR6JvdD8Kx18+Yl/4oCN8PHEP9KyibAoC50krO76Hf/4EjnUkYHYDY+F8A93XCszhUbQiQSUHcsPQFJINz00TX5hhodCJS2WpLIAbBDARAkMN+o/OYxEYeQS0y0Gz3rQTtJmQs+0ftMp8EDFsm+qLdmvhbn7cBr8o+k5Se5oOcGZ8K6E4FBHb+8ejfS0jLcYBdNn4SInBm0CuhSiIkMc7fEPxLiIDqKgMZYgQqdA86F7Brmf8KmCQhzV6BKLvWHGBy2QDzEKAtEpsgu04eoMvsNqVRY62OO6hhXbXAsieIhL60AbDLZhFA3AQa6w8xsN9hybegy+wZAkspzaAvk2OfTFhaIcds4z+SiQiEABLwrejb0ZvQ8VBvESJiDkgigbPQIjGLfoU9U1NzZCKBnW0iEkEPu+6RUWqJCAGuQ4+CXaXIxsJdbQSSdE27iyMTEjG+jVsSIrBEziFCz1IViOBOzYD6C8OgPvEwSIPDYDYkvJJK4nx+qReMvg9W+oOEo2UVEEbGAcYmrMwovgjQZEQZlRgJhwBVIs6c5EQZp2yHGDHygK7Dzz/9Bvtbj8I2/Ft5EVv34HO4PZw97w24rdFqCvcfhZIQgEJOgSd9EUDwjoQINA1/I5QrILN7QZc7RtnWMjk4zn634QGtzT04ZhoQYEDH2gYD5esBAUo5xYz7Ab5CAGx9Oxp3iDQwAtJV+jp9qlo2b7cpnPYNXri0Ygex+r53OJKQiYPReypJiDROmqQDC8khAXYVnNXIWSeFa4F3E1hKewmwQUIYt+D39dLaB9ggJRyQCBQYGZoLtKzS8uosq4Eq46wJwiGwnFmzj8rj3dizE7MJXQS7EhQ+BTaRm8CeHzSZSVZO9t0VWWVZtr5UIUDzz1jZ0XkQsBDZWdFwcOfokAV79yVJUTVirn4kLQnKc8lLoHoFqJI70BcwDj2lrZdI1cPcGrKir8oRKFeAwEcZKfJVExJXKGuiVyFwO/o0+gx6ipby9UjqmqdRlhmSVRGDpKEsF2dnpi/NdPxIVfZdTlukiECw4hYBpaQYbBzn0MifgCeqnSF3EIVViGQwfbIfSl/9CCoDA5PT1vjJ3lOw/fQ5KwmmqtESav39K7rKXF/vhPb9PoBxVUaiZ2YBBtGdsxQZZfmX5AK0oFtAwc76FPbj8nLM5dMiy14aiXwLthyc5dZgm9UUjzhrGddvo4yIDtfzHbLCNv9LWWvbJFBr2yRQa/sP25LGjrtpN08AAAAASUVORK5CYII=");
}
DECLARE_HTMLEXPORT_TEST(testXHTML, "hello.html")
{
OString aExpected("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML");
SvStream* pStream = maTempFile.GetStream(StreamMode::READ);
CPPUNIT_ASSERT(pStream);
OString aActual(read_uInt8s_ToOString(*pStream, aExpected.getLength()));
// This was HTML, not XHTML.
CPPUNIT_ASSERT_EQUAL(aExpected, aActual);
htmlDocPtr pDoc = parseHtml(maTempFile);
CPPUNIT_ASSERT(pDoc);
// This was lang, not xml:lang.
assertXPath(pDoc, "/html/body", "xml:lang", "en-US");
}
CPPUNIT_PLUGIN_IMPLEMENT();
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
......@@ -182,6 +182,8 @@ void SwHTMLWriter::SetupFilterOptions(SfxMedium& rMedium)
{
mbEmbedImages = true;
}
else if (sFilterOptions == "XHTML")
mbXHTML = true;
}
ErrCode SwHTMLWriter::WriteStream()
......@@ -931,7 +933,10 @@ const SwPageDesc *SwHTMLWriter::MakeHeader( sal_uInt16 &rHeaderAttrs )
OStringBuffer sOut;
if (!mbSkipHeaderFooter)
{
sOut.append(OOO_STRING_SVTOOLS_HTML_doctype " " OOO_STRING_SVTOOLS_HTML_doctype40);
if (mbXHTML)
sOut.append(OOO_STRING_SVTOOLS_HTML_doctype " " OOO_STRING_SVTOOLS_XHTML_doctype11);
else
sOut.append(OOO_STRING_SVTOOLS_HTML_doctype " " OOO_STRING_SVTOOLS_HTML_doctype40);
HTMLOutFuncs::Out_AsciiTag( Strm(), sOut.makeStringAndClear().getStr() );
// build prelude
......@@ -1276,8 +1281,12 @@ void SwHTMLWriter::OutLanguage( LanguageType nLang )
if( LANGUAGE_DONTKNOW != nLang )
{
OStringBuffer sOut;
sOut.append(' ').append(OOO_STRING_SVTOOLS_HTML_O_lang)
.append("=\"");
sOut.append(' ');
if (mbXHTML)
sOut.append(OOO_STRING_SVTOOLS_XHTML_O_lang);
else
sOut.append(OOO_STRING_SVTOOLS_HTML_O_lang);
sOut.append("=\"");
Strm().WriteCharPtr( sOut.makeStringAndClear().getStr() );
HTMLOutFuncs::Out_String( Strm(), LanguageTag::convertToBcp47(nLang),
m_eDestEnc, &m_aNonConvertableCharacters ).WriteChar( '"' );
......
......@@ -387,6 +387,8 @@ public:
/// If HTML header and footer should be written as well, or just the content itself.
bool mbSkipHeaderFooter : 1;
bool mbEmbedImages : 1;
/// If XHTML markup should be written instead of HTML.
bool mbXHTML = false;
#define sCSS2_P_CLASS_leaders "leaders"
bool m_bCfgPrintLayout : 1; // PrintLayout option for TOC dot leaders
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment