Kaydet (Commit) 25caea6f authored tarafından Gert van Valkenhoef's avatar Gert van Valkenhoef Kaydeden (comit) Caolán McNamara

Add C++ HelpIndexer

üst 15704e63
tr l10ntools : BERKELEYDB:berkeleydb EXPAT:expat LIBXSLT:libxslt LUCENE:lucene sal NULL
tr l10ntools : BERKELEYDB:berkeleydb EXPAT:expat LIBXSLT:libxslt sal NULL
tr l10ntools usr1 - all tr_mkout NULL
tr l10ntools\inc nmake - all tr_inc NULL
tr l10ntools\source nmake - all tr_src tr_inc NULL
......
......@@ -26,12 +26,14 @@ mkdir: %_DEST%\bin\help\com\sun\star\help
..\%__SRC%\bin\txtconv %_DEST%\bin\txtconv
..\%__SRC%\bin\ulfconv %_DEST%\bin\ulfconv
..\%__SRC%\class\FCFGMerge.jar %_DEST%\bin\FCFGMerge.jar
..\%__SRC%\class\HelpIndexerTool.jar %_DEST%\bin\HelpIndexerTool.jar
..\%__SRC%\bin\HelpLinker %_DEST%\bin\HelpLinker
..\%__SRC%\bin\HelpCompiler %_DEST%\bin\HelpCompiler
..\%__SRC%\bin\HelpCompiler.exe %_DEST%\bin\HelpCompiler.exe
..\%__SRC%\bin\HelpLinker %_DEST%\bin\HelpLinker
..\%__SRC%\bin\HelpLinker.exe %_DEST%\bin\HelpLinker.exe
..\%__SRC%\bin\HelpLinker* %_DEST%\bin
..\%__SRC%\bin\HelpIndexer %_DEST%\bin\HelpIndexer
..\%__SRC%\bin\HelpIndexer.exe %_DEST%\bin\HelpIndexer.exe
..\%__SRC%\bin\HelpIndexer* %_DEST%\bin
..\scripts\localize %_DEST%\bin\localize
..\scripts\fast_merge.pl %_DEST%\bin\fast_merge.pl
......
#include <CLucene/StdHeader.h>
#include <CLucene.h>
#ifdef TODO
#include <CLucene/analysis/LanguageBasedAnalyzer.h>
#endif
#include <unistd.h>
#include <sys/stat.h>
#include <dirent.h>
#include <errno.h>
#include <string.h>
#include <string>
#include <iostream>
#include <algorithm>
#include <set>
// I assume that TCHAR is defined as wchar_t throughout
using namespace lucene::document;
class HelpIndexer {
private:
std::string d_lang;
std::string d_module;
std::string d_captionDir;
std::string d_contentDir;
std::string d_indexDir;
std::string d_error;
std::set<std::string> d_files;
public:
/**
* @param lang Help files language.
* @param module The module of the helpfiles.
* @param captionDir The directory to scan for caption files.
* @param contentDir The directory to scan for content files.
* @param indexDir The directory to write the index to.
*/
HelpIndexer(std::string const &lang, std::string const &module,
std::string const &captionDir, std::string const &contentDir,
std::string const &indexDir);
/**
* Run the indexer.
* @return true if index successfully generated.
*/
bool indexDocuments();
/**
* Get the error string (empty if no error occurred).
*/
std::string const & getErrorMessage();
private:
/**
* Scan the caption & contents directories for help files.
*/
bool scanForFiles();
/**
* Scan for files in the given directory.
*/
bool scanForFiles(std::string const &path);
/**
* Fill the Document with information on the given help file.
*/
bool helpDocument(std::string const & fileName, Document *doc);
/**
* Create a reader for the given file, and create an "empty" reader in case the file doesn't exist.
*/
lucene::util::Reader *helpFileReader(std::string const & path);
std::wstring string2wstring(std::string const &source);
};
HelpIndexer::HelpIndexer(std::string const &lang, std::string const &module,
std::string const &captionDir, std::string const &contentDir, std::string const &indexDir) :
d_lang(lang), d_module(module), d_captionDir(captionDir), d_contentDir(contentDir), d_indexDir(indexDir), d_error(""), d_files() {}
bool HelpIndexer::indexDocuments() {
if (!scanForFiles()) {
return false;
}
#ifdef TODO
// Construct the analyzer appropriate for the given language
lucene::analysis::Analyzer *analyzer = (
d_lang.compare("ja") == 0 ?
(lucene::analysis::Analyzer*)new lucene::analysis::LanguageBasedAnalyzer(L"cjk") :
(lucene::analysis::Analyzer*)new lucene::analysis::standard::StandardAnalyzer());
#else
lucene::analysis::Analyzer *analyzer = (
(lucene::analysis::Analyzer*)new lucene::analysis::standard::StandardAnalyzer());
#endif
lucene::index::IndexWriter writer(d_indexDir.c_str(), analyzer, true);
// Index the identified help files
Document doc;
for (std::set<std::string>::iterator i = d_files.begin(); i != d_files.end(); ++i) {
doc.clear();
if (!helpDocument(*i, &doc)) {
delete analyzer;
return false;
}
writer.addDocument(&doc);
}
// Optimize the index
writer.optimize();
delete analyzer;
return true;
}
std::string const & HelpIndexer::getErrorMessage() {
return d_error;
}
bool HelpIndexer::scanForFiles() {
if (!scanForFiles(d_contentDir)) {
return false;
}
if (!scanForFiles(d_captionDir)) {
return false;
}
return true;
}
bool HelpIndexer::scanForFiles(std::string const & path) {
DIR *dir = opendir(path.c_str());
if (dir == 0) {
d_error = "Error reading directory " + path + strerror(errno);
return true;
}
struct dirent *ent;
struct stat info;
while ((ent = readdir(dir)) != 0) {
if (stat((path + "/" + ent->d_name).c_str(), &info) == 0 && S_ISREG(info.st_mode)) {
d_files.insert(ent->d_name);
}
}
closedir(dir);
return true;
}
bool HelpIndexer::helpDocument(std::string const & fileName, Document *doc) {
// Add the help path as an indexed, untokenized field.
std::wstring path(L"#HLP#" + string2wstring(d_module) + L"/" + string2wstring(fileName));
doc->add(*new Field(_T("path"), path.c_str(), Field::STORE_YES | Field::INDEX_UNTOKENIZED));
// Add the caption as a field.
std::string captionPath = d_captionDir + "/" + fileName;
doc->add(*new Field(_T("caption"), helpFileReader(captionPath), Field::STORE_NO | Field::INDEX_TOKENIZED));
// FIXME: does the Document take responsibility for the FileReader or should I free it somewhere?
// Add the content as a field.
std::string contentPath = d_contentDir + "/" + fileName;
doc->add(*new Field(_T("content"), helpFileReader(contentPath), Field::STORE_NO | Field::INDEX_TOKENIZED));
// FIXME: does the Document take responsibility for the FileReader or should I free it somewhere?
return true;
}
lucene::util::Reader *HelpIndexer::helpFileReader(std::string const & path) {
if (access(path.c_str(), R_OK) == 0) {
return new lucene::util::FileReader(path.c_str(), "UTF-8");
} else {
return new lucene::util::StringReader(L"");
}
}
std::wstring HelpIndexer::string2wstring(std::string const &source) {
std::wstring target(source.length(), L' ');
std::copy(source.begin(), source.end(), target.begin());
return target;
}
int main(int argc, char **argv) {
const std::string pLang("-lang");
const std::string pModule("-mod");
const std::string pOutDir("-zipdir");
const std::string pSrcDir("-srcdir");
std::string lang;
std::string module;
std::string srcDir;
std::string outDir;
bool error = false;
for (int i = 1; i < argc; ++i) {
if (pLang.compare(argv[i]) == 0) {
if (i + 1 < argc) {
lang = argv[++i];
} else {
error = true;
}
} else if (pModule.compare(argv[i]) == 0) {
if (i + 1 < argc) {
module = argv[++i];
} else {
error = true;
}
} else if (pOutDir.compare(argv[i]) == 0) {
if (i + 1 < argc) {
outDir = argv[++i];
} else {
error = true;
}
} else if (pSrcDir.compare(argv[i]) == 0) {
if (i + 1 < argc) {
srcDir = argv[++i];
} else {
error = true;
}
} else {
error = true;
}
}
if (error) {
std::cerr << "Error parsing command-line arguments" << std::endl;
}
if (error || lang.empty() || module.empty() || srcDir.empty() || outDir.empty()) {
std::cerr << "Usage: HelpIndexer -lang ISOLangCode -mod HelpModule -srcdir SourceDir -zipdir OutputDir" << std::endl;
return 1;
}
std::string captionDir(srcDir + "/caption");
std::string contentDir(srcDir + "/content");
std::string indexDir(outDir + "/" + module + ".idxl");
HelpIndexer indexer(lang, module, captionDir, contentDir, indexDir);
if (!indexer.indexDocuments()) {
std::cerr << indexer.getErrorMessage() << std::endl;
return 2;
}
return 0;
}
......@@ -60,8 +60,10 @@ SLOFILES=\
EXCEPTIONSFILES=\
$(OBJ)$/HelpLinker.obj \
$(OBJ)$/HelpCompiler.obj \
$(OBJ)$/helpindexer.obj \
$(SLO)$/HelpLinker.obj \
$(SLO)$/HelpCompiler.obj
.IF "$(OS)" == "MACOSX" && "$(CPU)" == "P" && "$(COM)" == "GCC"
# There appears to be a GCC 4.0.1 optimization error causing _file:good() to
# report true right before the call to writeOut at HelpLinker.cxx:1.12 l. 954
......@@ -72,6 +74,9 @@ NOOPTFILES=\
$(SLO)$/HelpLinker.obj
.ENDIF
PKGCONFIG_MODULES=libclucene-core
.INCLUDE : pkg_config.mk
APP1TARGET= $(TARGET)
APP1OBJS=\
$(OBJ)$/HelpLinker.obj \
......@@ -79,6 +84,12 @@ APP1OBJS=\
APP1RPATH = NONE
APP1STDLIBS+=$(SALLIB) $(BERKELEYLIB) $(XSLTLIB) $(EXPATASCII3RDLIB)
APP2TARGET=HelpIndexer
APP2OBJS=\
$(OBJ)$/helpindexer.obj
APP2RPATH = NONE
APP2STDLIBS+=$(SALLIB) $(PKGCONFIG_LIBS)
SHL1TARGET =$(LIBBASENAME)$(DLLPOSTFIX)
SHL1LIBS= $(SLB)$/$(TARGET).lib
.IF "$(COM)" == "MSC"
......@@ -93,26 +104,7 @@ SHL1USE_EXPORTS =ordinal
DEF1NAME =$(SHL1TARGET)
DEFLIB1NAME =$(TARGET)
JAVAFILES = \
HelpIndexerTool.java \
HelpFileDocument.java
JAVACLASSFILES = \
$(CLASSDIR)$/$(PACKAGE)$/HelpIndexerTool.class \
$(CLASSDIR)$/$(PACKAGE)$/HelpFileDocument.class
.IF "$(SYSTEM_LUCENE)" == "YES"
EXTRAJARFILES += $(LUCENE_CORE_JAR) $(LUCENE_ANALYZERS_JAR)
.ELSE
JARFILES += lucene-core-2.3.jar lucene-analyzers-2.3.jar
.ENDIF
JAVAFILES = $(subst,$(CLASSDIR)$/$(PACKAGE)$/, $(subst,.class,.java $(JAVACLASSFILES)))
JARCLASSDIRS = $(PACKAGE)/*
JARTARGET = HelpIndexerTool.jar
JARCOMPRESS = TRUE
# --- Targets ------------------------------------------------------
.INCLUDE : target.mk
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment