Kaydet (Commit) 4ed2f0a3 authored tarafından Eray Özkural's avatar Eray Özkural

* implement search command

 - just enough improvements in pisi.search to make it work :)
* rearrange pisi TODO list
üst 8bfd9145
......@@ -74,6 +74,7 @@ Legend:
3. Beta
+ build cmd: installl build deps
+ improve url support, it's a little dumb at the moment
+ devise a File class that works with URLs
+ add URL support to autoxml
......@@ -169,26 +170,21 @@ Legend:
4. Release
* high priority
- transaction stuff for database (eray)
* medium priority
/ high-level build commands
- install build dependencies
- component improvements (optional)
? if comp a has comp b, then it must have all packages in b
- maintain a list if components each component has
- maintain a list of components each component has
* low priority
- search command: (eray)
- implement an inverted index (if there's a good one, just use it)
- search in either summary, or description
- english stemming
- turkish stemming
- phonetic/syntactic fault tolerance (whichever works!)
- in particular evaluate soundex and metaphone
- research if there are language independent phonetic encodings
- implement as an option (or make it possible to turn it off if it works well :))
- fault tolerant search in package names (somehow)
+ search command: (eray)
+ implement an inverted index
+ search in summary and description
/ overhaul installdb (try to merge its use with packagedb) (eray)
/ verify metodlari
+ SpecFile
......@@ -203,7 +199,10 @@ Legend:
5. Post Release
- high-level build commands
- build-all (or build-distro) command
- build command enhancements:
- aware of repo sources
- full dependency analysis like install/upgrade etc.
- build-all (or build-distro) command, or as a separate tool
- an emerge command, just for fun
- more support for categories:
- put categories into a database
......@@ -213,10 +212,22 @@ Legend:
- sourceb
- improve url support: consider adding "other" protocols
- removable media: media://
- transaction stuff for database (eray)
- multi-architecture support (baris, caglar)
/ design decisions
/ extend XML specs to support that
- cross-platform building support
- incremental build/fetch for repository index (pisi-index.xml)
- diffsets (caglar)
- a command to check if repos are consistent wrt dep relations
* low priority
- search command: (eray)
- search components
- english stemming
- turkish stemming
- phonetic/syntactic fault tolerance (whichever works!)
- in particular evaluate soundex and metaphone
- research if there are language independent phonetic encodings
- implement as an option (or make it possible to turn it off if it works well :))
- fault tolerant search in package names (somehow)
......@@ -46,6 +46,7 @@ from pisi.build import build, build_until
from pisi.atomicoperations import resurrect_package
from pisi.metadata import MetaData
from pisi.files import Files
import pisi.search
class Error(pisi.Error):
pass
......@@ -75,6 +76,7 @@ def init(database = True, options = None, ui = None, comar = True):
ctx.componentdb = pisi.component.ComponentDB()
packagedb.init_db()
pisi.sourcedb.init()
pisi.search.init(['summary', 'description'], ['en', 'tr'])
else:
ctx.repodb = None
ctx.installdb = None
......@@ -93,6 +95,7 @@ def finalize():
ctx.componentdb.close()
packagedb.finalize_db()
pisi.sourcedb.finalize()
pisi.search.finalize()
ctx.ui.debug('PISI API finalized')
ctx.ui.close()
ctx.initialized = False
......
......@@ -928,18 +928,56 @@ class ListPending(Command):
list = ctx.installdb.list_pending()
for p in list.keys():
print p
self.finalize()
class Search(Command):
class Search(Info):
"""Search packages
Usage: search <search pattern>
Usage: search <term1> <term2> ... <termn>
#FIXME: fill this later
Finds a package in repository containing specified search terms
"""
pass
__metaclass__ = autocommand
def __init__(self):
super(Search, self).__init__()
name = ("search", "s")
def options(self):
super(Search, self).options()
self.parser.add_option("-l", "--language", action="store",
help=_("set search language"))
def get_lang(self):
lang = ctx.get_option('language')
if not lang:
lang = pisi.pxml.autoxml.LocalText.get_lang()
if not lang in ['en', 'tr']:
lang = 'en'
return lang
def search(self, id, terms):
lang = self.get_lang()
return pisi.search.query(id, lang, terms)
def run(self):
self.init(True)
if not self.args:
self.help()
return
r1 = self.search('summary', self.args)
r2 = self.search('description', self.args)
r = r1.union(r2)
for pkg in r:
self.printinfo_package(pkg)
self.finalize()
class SearchFile(Command):
"""Search for a file
......@@ -961,8 +999,8 @@ Finds the installed package which contains the specified file.
self.parser.add_option("-f", "--fuzzy", action="store_true",
default=False, help=_("fuzzy search"))
# and what does exact mean? -- exa
@staticmethod #fuck python 2.3 compatibility I don't care about it
# what does exact mean? -- exa
@staticmethod
def search_exact(path):
files = []
path = path.lstrip('/') #FIXME: this shouldn't be necessary :/
......
......@@ -80,6 +80,13 @@ class PackageDB(object):
self.dr[dep_name] = [ (name, dep) ]
# add component
ctx.componentdb.add_package(package_info.partOf, package_info.name)
# index summary and description
for (lang, doc) in package_info.summary.iteritems():
if lang in ['en', 'tr']:
pisi.search.add_doc('summary', lang, package_info.name, doc)
for (lang, doc) in package_info.description.iteritems():
if lang in ['en', 'tr']:
pisi.search.add_doc('description', lang, package_info.name, doc)
def clear(self):
self.d.clear()
......
......@@ -106,6 +106,7 @@ class LocalText(dict):
newnode.appendChild(newtext)
node.appendChild(newnode)
#FIXME: maybe more appropriate for pisi.util
@staticmethod
def get_lang():
try:
......
......@@ -55,4 +55,6 @@ def remove_doc(id, lang, docid, str):
ctx.invidx[id][lang].remove_doc(docid)
def query(id, lang, terms):
import preprocess as p
terms = map(lambda x: p.lower(lang, x), terms)
return ctx.invidx[id][lang].query(terms)
......@@ -12,5 +12,29 @@
import tokenize
def lowly_python(str):
def lowly_char(c):
if c=='I':
lowly = 'i' # because of some fools we can't choose locale in lower
else:
lowly = c.lower()
return c
r = ""
for c in str:
r += lowly_char(c)
return r
def lower(lang, str):
if lang=='tr':
return lowly_python(str)
else:
return str.lower()
def preprocess(lang, str):
return tokenize.tokenize(lang, str)
terms = tokenize.tokenize(lang, str)
# normalize
terms = map(lambda x: lower(lang, x), terms)
return terms
......@@ -25,6 +25,8 @@ def tokenize(lang, str):
if len(token) > 0:
tokens.append(token)
token = unicode()
elif x in string.punctuation:
pass # eat punctuation
else:
token += x
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment