""" Original Version by Roberto De Almeida. Hacked renamed and ported to new lupy version by Steven Armstrong. This is based on Ted Leung's lucene.py. Allow searching of the blog by using a Lupy search for a set of terms. This plugin consists of two scripts: lupy_search.py: the actual plugin lupy_index.py: a helper script to create the index, see below If you make any changes to this plugin, please send a patch to so that we can incorporate them. Thanks! To install: 1) Put lupy_search.py in your plugin directory. 2) In config.py add lupy_search to py['load_plugins'] 3) Add the following to config.py: py['lupy_index'] = '/path/to/your/index' py['lupy_file_ext'] = ['txt', 'txtl', ...] 3) Set up a cron job to run lupy_index.py daily: 0 0 * * * /path/to/lupy_index.py -q 4) Somewhere in your web page you need a search form:
The action of the form should be the top level URI of your blog. $Id: lupy_search.py,v 1.3 2006/03/22 20:52:22 sar Exp $ """ __author__ = "Roberto De Almeida , Steven Armstrong " __version__ = "$Revision: 1.3 $ $Date: 2006/03/22 20:52:22 $" __url__ = "http://www.c-area.ch/code/" __description__ = "Lupy search" # Python imports import os, urllib, sys # Lupy 0.2.1 imports http://divmod.org/Home/Projects/Lupy/ from lupy.index.term import Term from lupy.search.indexsearcher import IndexSearcher from lupy.search.term import TermQuery from lupy.search.phrase import PhraseQuery from lupy.search.boolean import BooleanQuery # Pyblosxom imports from Pyblosxom.entries import fileentry from Pyblosxom import tools from config import py as cfg _debug = False if _debug: log = tools.getLogger() indexpath = os.path.abspath(cfg['lupy_index']) _file_ext_default = ['txt'] def verify_installation(request): config = request.getConfiguration() retval = 1 if not config.has_key('lupy_index'): print 'The "lupy_index" property must be set in the config file.' retval = 0 if not config.has_key('lupy_file_ext'): print 'The optional property "lupy_file_ext" is not set.' print 'Using the default of %s.' % _file_ext_default print 'This property is used by the boundled lupy_index.py script.' return retval # Lupy util functions def termSearch(qStr): """Find all docs containing the word C{qStr}.""" t = Term('text', qStr) q = TermQuery(t) return q def phraseSearch(qStr, field='text'): """Find all docs containing the phrase C{qStr}.""" parts = qStr.split() q = PhraseQuery() for p in parts: t = Term(field, p) q.add(t) return q def boolSearch(ands=[], ors=[], nots=[]): """ Build a simple boolean query. each word in B{ands} is equiv to +word each word in B{ors} is equiv to word each word in B{nots} is equiv to -word e.g. boolSearch(['spam'], ['eggs'], ['parrot', 'cheese']) is equiv to +spam eggs -parrot -cheese in Google/Lucene syntax """ q = BooleanQuery() for a in ands: t = Term('text', a) tq = TermQuery(t) q.add(tq, True, False) for a in ors: t = Term('text', a) tq = TermQuery(t) q.add(tq, False, False) for a in nots: t = Term('text', a) tq = TermQuery(t) q.add(tq, False, True) return q def runQuery(q, searcher): """Run a query through a searcher and return the hits""" hits = searcher.search(q) return hits # /Lupy util functions def makeEntry(filename, request): """ @param filename: filename of matching entry @type filename: string @param config: a pyblosxom config dict @type config: a dict """ config = request.getConfiguration() return fileentry.FileEntry(request, filename, config['datadir']) """ Note to self: s = '"get this" Hello +World -Planet "Hello World"' phrase_re = re.compile(r'("[^"]*")') phrase = re.findall(phrase_re, s) bool = phrase_re.sub('', s).strip().split() """ def search(request, config, term): """ Search for the specified search term @param config: a pyblosxom config dict @type config: a dict @param term: the search term @type term: a string """ urllib.quote(term) term = term.lower() if _debug: log.info("term: %s"+ term) searcher = IndexSearcher(indexpath) # phraseSearch if term is like: "This is a phrase" if term.startswith("\"") and term.endswith("\""): if _debug: log.info("phraseSearch") q = phraseSearch(term[1:-1]) # boolSearch if term is like: +hello planet -world elif "+" in term or "-" in term or " " in term: if _debug: log.info("boolSearch") terms = term.split() ands, ors, nots = [], [], [] for t in terms: if t.startswith("+"): ands.append(t[1:]) elif t.startswith("-"): nots.append(t[1:]) else: ors.append(t) if _debug: log.info(ands, ors, nots) q = boolSearch(ands, ors, nots) # termSearch if term is like: gugus else: if _debug: log.info("termSearch") q = termSearch(term) files = runQuery(q, searcher) results = [] for file in files: results.append(file.get('file')) searcher.close() #results = [ os.path.join(config['datadir'], x[2:-1]) for x in results ] entries = [ makeEntry(x, request) for x in results] entries = [ ( x._mtime, x ) for x in entries ] entries.sort() entries.reverse() return [ x[1] for x in entries ] def cb_prepare(args): """ Add a nice header for the Lupy search, this header goes into the $searchHeader variable for including in the header template file. """ # do nothing if the form is not a lupy form request = args["request"] form = request.getHttp()['form'] if not form.has_key("q"): return None data = request.getData() resultnumber = len(data['lupyResults']) if resultnumber < 1: data['searchHeader'] = "
Your search returned no results for %s. Try again.
" % form["q"].value else: data['searchHeader'] = "
Your search returned %s result(s) for %s. They are below:
" % (str(resultnumber), form["q"].value) def cb_filelist(args): """ Lupy search handling @param request: the Pyblosxom request @type request: a Pyblosxom request object """ # do nothing if the form is not a lupy form request = args["request"] form = request.getHttp()['form'] if not form.has_key("q"): return None config = request.getConfiguration() data = request.getData() data['lupyResults'] = search(request, config, form["q"].value) return data['lupyResults']