Kaydet (Commit) 9038e0bd authored tarafından Eray Özkural's avatar Eray Özkural

* fix: make inverted index stuff aware of multiple repositories. ouch.

üst 507c285c
......@@ -12,9 +12,9 @@
# PiSi version
__version__ = "1.1_beta7"
__version__ = "1.1_beta8"
__dbversion__ = "1.1_beta7"
__dbversion__ = "1.1_beta8"
__filesdbversion__ = "1.0.5" # yes, this is the real bottleneck
__all__ = [ 'api', 'config', 'packagedb', 'installdb', 'search' ]
......
......@@ -288,22 +288,22 @@ def search_package_names(query):
r.add(pkgname)
return r
def search_package_terms(terms, lang = None, search_names = True):
def search_package_terms(terms, lang = None, search_names = True, repo = pisi.itembyrepodb.all):
if not lang:
lang = pisi.pxml.autoxml.LocalText.get_lang()
r1 = pisi.search.query_terms('summary', lang, terms)
r2 = pisi.search.query_terms('description', lang, terms)
r1 = pisi.search.query_terms('summary', lang, terms, repo = repo)
r2 = pisi.search.query_terms('description', lang, terms, repo = repo)
r = r1.union(r2)
if search_names:
for term in terms:
r |= search_package_names(term)
return r
def search_package(query, lang = None, search_names = True):
def search_package(query, lang = None, search_names = True, repo = pisi.itembyrepodb.all):
if not lang:
lang = pisi.pxml.autoxml.LocalText.get_lang()
r1 = pisi.search.query('summary', lang, query)
r2 = pisi.search.query('description', lang, query)
r1 = pisi.search.query('sumdesc', lang, query, repo = repo)
r2 = pisi.search.query('description', lang, query, repo = repo)
r = r1.union(r2)
if search_names:
r |= search_package_names(query)
......
......@@ -9,15 +9,15 @@
#
# Please read the COPYING file.
#
# package database
# interface for update/query to local package repository
# Authors: Eray Ozkural <eray at pardus.org.tr>
# Baris Metin <baris at pardus.org.tr>
# we basically store everything in PackageInfo class
# yes, we are cheap
"""package database
interface for update/query to local package repository
we basically store everything in PackageInfo class
yes, we are cheap
"""
import os
import fcntl
......@@ -108,10 +108,10 @@ class PackageDB(object):
# index summary and description
for (lang, doc) in package_info.summary.iteritems():
if lang in ['en', 'tr']:
pisi.search.add_doc('summary', lang, package_info.name, doc, txn)
pisi.search.add_doc('summary', lang, package_info.name, doc, repo=repo, txn=txn)
for (lang, doc) in package_info.description.iteritems():
if lang in ['en', 'tr']:
pisi.search.add_doc('description', lang, package_info.name, doc, txn)
pisi.search.add_doc('description', lang, package_info.name, doc, repo=repo, txn=txn)
ctx.txn_proc(proc, txn)
......@@ -133,6 +133,12 @@ class PackageDB(object):
if self.dr.has_key(name, repo, txn=txn):
self.dr.remove_item(name, repo, txn=txn)
ctx.componentdb.remove_package(package_info.partOf, package_info.name, repo, txn)
for (lang, doc) in package_info.summary.iteritems():
if lang in ['en', 'tr']:
pisi.search.remove_doc('summary', lang, package_info.name, doc, repo=repo, txn=txn)
for (lang, doc) in package_info.description.iteritems():
if lang in ['en', 'tr']:
pisi.search.remove_doc('description', lang, package_info.name, doc, repo=repo, txn=txn)
self.d.txn_proc(proc, txn)
def remove_repo(self, repo, txn = None):
......
......@@ -47,17 +47,17 @@ def finalize():
ctx.invidx[id][lang].close()
ctx.invidx = {}
def add_doc(id, lang, docid, str, txn = None):
def add_doc(id, lang, docid, str, repo = None, txn = None):
terms = p.preprocess(lang, str)
ctx.invidx[id][lang].add_doc(docid, terms, txn)
ctx.invidx[id][lang].add_doc(docid, terms, repo=repo, txn=txn)
def remove_doc(id, lang, docid, str, txn = None):
ctx.invidx[id][lang].remove_doc(docid, txn)
def remove_doc(id, lang, docid, str, repo = None, txn = None):
ctx.invidx[id][lang].remove_doc(docid, repo = repo, txn = txn)
def query_terms(id, lang, terms, txn = None):
def query_terms(id, lang, terms, repo = None, txn = None):
terms = map(lambda x: p.lower(lang, x), terms)
return ctx.invidx[id][lang].query(terms, txn)
return ctx.invidx[id][lang].query(terms, repo = repo, txn = txn)
def query(id, lang, str, txn = None):
def query(id, lang, str, repo = None, txn = None):
terms = p.preprocess(lang, str)
return query_terms(id, lang, terms, txn)
return query_terms(id, lang, terms, repo = repo, txn = txn)
......@@ -14,59 +14,70 @@
import types
import pisi.lockeddbshelve as shelve
from pisi.itembyrepodb import ItemByRepoDB
import pisi.itembyrepodb as itembyrepodb
class InvertedIndex(object):
"""a database of term -> set of documents"""
def __init__(self, id, lang):
self.d = shelve.LockedDBShelf('ii-%s-%s' % (id, lang))
self.d = ItemByRepoDB('ii-%s-%s' % (id, lang))
def close(self):
self.d.close()
def has_term(self, term, txn = None):
return self.d.has_key(shelve.LockedDBShelf.encodekey(term), txn)
def has_term(self, term, repo = None, txn = None):
return self.d.has_key(shelve.LockedDBShelf.encodekey(term), repo=repo,txn=txn)
def get_term(self, term, txn = None):
def get_term(self, term, repo = None, txn = None):
"""get set of doc ids given term"""
term = shelve.LockedDBShelf.encodekey(term)
def proc(txn):
if not self.has_term(term, txn):
if not self.has_term(term, repo=repo, txn=txn):
return set()
return self.d.get(term, txn)
return self.d.get_item(term, repo=repo, txn=txn)
return self.d.txn_proc(proc, txn)
def query(self, terms, txn = None):
def get_union_term(self, name, txn = None, repo = itembyrepodb.repos ):
"""get a union of all repository components packages, not just the first repo in order.
get only basic repo info from the first repo"""
name = shelve.LockedDBShelf.encodekey(name)
def proc(txn):
docs = [ self.get_term(x, txn) for x in terms ]
terms= set()
if self.d.d.has_key(name):
s = self.d.d.get(name, txn=txn)
for repostr in self.d.order(repo = repo):
if s.has_key(repostr):
terms |= s[repostr]
return terms
return self.d.txn_proc(proc, txn)
def query(self, terms, repo = None, txn = None):
def proc(txn):
docs = [ self.get_union_term(x, repo=repo, txn=txn) for x in terms ]
if docs:
return reduce(lambda x,y: x.intersection(y), docs)
else:
return set()
return self.d.txn_proc(proc, txn)
def list_terms(self, txn= None):
list = []
def f(txn):
for term in self.d.iterkeys(txn):
list.append(term)
return list
return self.d.txn_proc(f, txn)
def list_terms(self, repo = None, txn= None):
return self.d.list(f, repo=repo, txn=txn)
def add_doc(self, doc, terms, txn = None):
def add_doc(self, doc, terms, repo = None, txn = None):
def f(txn):
for term_i in terms:
term_i = shelve.LockedDBShelf.encodekey(term_i)
term_i_docs = self.get_term(term_i, txn)
term_i_docs = self.get_term(term_i, repo=repo, txn=txn)
term_i_docs.add(doc)
self.d.put(term_i, term_i_docs, txn) # update
self.d.add_item(term_i, term_i_docs, repo=repo, txn=txn) # update
return self.d.txn_proc(f, txn)
def remove_doc(self, doc, terms):
def remove_doc(self, doc, terms,repo=None, txn=None):
def f(txn):
for term_i in terms:
term_i = shelve.LockedDBShelf.encodekey(term_i)
term_i_docs = self.get_term(term_i)
term_i_docs = self.get_term(term_i,repo=repo, txn=txn)
term_i_docs.remove(doc)
self.d.put(term_i, term_i_docs, txn) # update
self.d.add_item(term_i, term_i_docs, repo=repo, txn=txn) # update
return self.d.txn_proc(f, txn)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment