SearchKitExample1

This sample was translated sample code at deallus.com

This example indexes a single directory (without recursion) and allows searching for documents in that directory using a (very) simple GUI.

Sources

AppController.py

import Cocoa
import SearchKit
import objc
import os

class AppController (Cocoa.NSObject):

    myWindow = objc.IBOutlet()

    selectDirectoryButton = objc.IBOutlet()
    directoryTextField = objc.IBOutlet()

    numberOfDocumentsTextField = objc.IBOutlet()
    numberOfTermsTextField = objc.IBOutlet()

    buildIndexButton = objc.IBOutlet()

    searchField = objc.IBOutlet()
    searchResultsTextView = objc.IBOutlet()

    directoryToIndex = objc.ivar()
    myIndex = objc.ivar()


    def awakeFromNib(self):
        # set our default directory to index to be our home directory
        self.directoryToIndex = Cocoa.NSHomeDirectory()
        # go ahead and put that value into the UI right on wake-up
        self.directoryTextField.setStringValue_(self.directoryToIndex)

        # tell the search field cell that we only want to send
        # search requests (the associated action) on pressing return
        searchCell = self.searchField.cell()
        searchCell.setSendsWholeSearchString_(True)
        # the alternative (False) would be to get a search initiated
        # with every keystroke - which may be what is desired in a
        # prefix style search (like iTunes).


    @objc.IBAction
    def chooseDirectory_(self, sender):
        # we are setting up an NSOpenPanel to select only a directory and then
        # we will use that directory to choose where to place our index file and
        # which files we'll read in to make searchable.
        op = Cocoa.NSOpenPanel.openPanel()
        op.setCanChooseDirectories_(True)
        op.setCanChooseFiles_(False)
        op.setResolvesAliases_(True)
        op.setAllowsMultipleSelection_(False)
        result = op.runModalForDirectory_file_types_(None, None, None)
        if result == Cocoa.NSOKButton:
            self.directoryToIndex = op.filename()
            self.directoryTextField.setStringValue_(self.directoryToIndex)

    @objc.IBAction
    def buildIndex_(self, sender):
        # we will need an NSFileManager object to do various checking of files.
        fm = Cocoa.NSFileManager.defaultManager()

        # you don't have to provide the index with a name, but we will...
        indexName = "My Arbitrary Index Name"
        # when you build an index on disk, you need to give it a file:# URL
        # So we pick the directory that's been chosen and append "/myindex"
        # onto it
        indexFile = os.path.join(self.directoryToIndex, "myindex")
        # and build an NSURL from that
        fileUrlToIndex = Cocoa.NSURL.fileURLWithPath_(indexFile)

        # When indexing documents, there's a number of index specific
        # considerations that can be applied with search kit. To set any of
        # them manually, we first need to create a Dictionary in which to hold
        # them.
        analysisDict = {}
        # Then we can set things like the language to expect
        # note that the constants associated with these attributes are
        # CFStringRef's so we cast them to NSString to make it easy to insert
        # into the dictionary.
        # We could also do this all in procedural C with the native
        # CoreFoundation components, but I find this is significantly easier
        # - both to write and  to understand.
        analysisDict[SearchKit.kSKLanguageTypes] = u"en"
        # another example of setting an attribute, in this case the minimum term
        # length
        analysisDict[SearchKit.kSKMinTermLength] = 2
        # when we hand this dictionary into the function to create the index, we
        # simply cast it back to a CFDictionaryRef and everything just nicely
        # moves with it. This toll-free bridging concept is so handy!

        if fm.fileExistsAtPath_(indexFile):
            # our index file already exists... if we try to create one anyone,
            # the function will silently fail. So let's just be sure and
            # delete it. in a proper function, we would recognize it already
            # exists and attmept to open it for editing rather than recreate
            # the whole thing.
            fm.removeFileAtPath_handler_(indexFile, None)

        # the function to create the on-disk index
        self.myIndex = SearchKit.SKIndexCreateWithURL(
                                        # the file:# URL of where to place the
                                        # index file
                                        fileUrlToIndex,
                                        # a name for the index (this may be None)
                                        indexName,
                                        # the type of index
                                        SearchKit.kSKIndexInverted,
                                        # and our index attributes dictionary
                                        analysisDict)
        # note that the above function call will silently fail if you give it a
        # directory and not a file... or if the index file already exists

        if self.myIndex is None:
            # we shouldn't get here...
            Cocoa.NSLog("TROUBLE: index is None")
            return

        # display information about our newly created and completely blank index
        self.displayIndexInformation()

        # before we get into the meat of reading in these files, we need to tell
        # the Search Kit to load the default extractor plugins so that the
        # SearchKit can find the bits it needs to from the files. In
        # MacOS 10.3, the only plugins supported are the default plugins,
        # which include processing for plaintext, PDF, HTML, RTF, and
        # Microsoft Word documents.
        SearchKit.SKLoadDefaultExtractorPlugIns()

        # we're just going to get a "short list". You could alternately use the
        # NSDirectoryEnumerator class to recursively descend through all the
        # files under a directory, and you would probably want to do it in
        # some other thread, updating a progress bar or such to indicate
        # something was happening. It can take a while (for example) to
        # process every file in your home directory and beneath.

        listOfFiles = fm.directoryContentsAtPath_(self.directoryToIndex)
        for aFile in listOfFiles:
            # the particular function we'll use like to get an NSURL
            # object, so we'll create one and pass it over.

            # create the URL with the filename
            fileURL = Cocoa.NSURL.fileURLWithPath_(
                    os.path.join(self.directoryToIndex, aFile))

            # invoke a helper method to add this file into our index
            self.addDocumentToIndex_(fileURL)

        # once all the files have been added to the index, it's very important
        # to flush the data down to disk. Without this step, the data resides
        # in memory but isn't useful for searching and won't respond with the
        # correct information about the index (number of documents, number of
        # terms, etc).
        if not SearchKit.SKIndexFlush(self.myIndex):
            Cocoa.NSLog("A problem was encountered flushing the index to disk.")

        Cocoa.NSLog("flushed index to disk")

        # update the index information
        self.displayIndexInformation()

    @objc.IBAction
    def search_(self, sender):
        # do some sanity checking to make sure we're not going to run into some
        # memory exception because we never initialized our SearchKit index
        # properly
        if self.myIndex is None:
            msg = "No index has been created to against which to search."
            Cocoa.NSLog(msg)
            self.searchResultsTextView.setString_(msg)
            return

        # Since this is just an example, I am going to be lazy and just report
        # the results of our search into a text field. I'm creating an
        # NSMutableString to build up what will appear there.
        textOfResults = ''

        searchQuery = sender.stringValue()
        textOfResults += "Searching for:"
        textOfResults += searchQuery
        textOfResults += "\n"

        # to do a search, we need a SearchGroup
        # so we start by creating an array of 1 item (our search index)
        searchArray = [self.myIndex]

        # and then we immediately turn around and use that to create
        # a Search Kit Search Group reference.
        searchgroup = SearchKit.SKSearchGroupCreate(searchArray)

        # now that we have a searchgroup, we can request a result set
        # from it with our search terms.
        searchResults = SearchKit.SKSearchCreate(self.myIndex, searchQuery, SearchKit.kSKSearchRanked)

        if searchResults is None:
            msg = "Search function failed"
            Cocoa.NSLog(msg)
            self.searchResultsTextView.setString_(msg)
            return

        # now to go through the results, we can create an array for each
        # SearchKit document and another for the scores, and then populate
        # them from the SearchResults
        busy, outDocumentIDsArray, scoresArray, resultCount=  SearchKit.SKSearchFindMatches(searchResults, # the search result set
                                          10,
                                          None,      # an array of SKDocumentID
                                          None,      # an array of scores
                                          1.0,       # max 1 sec
                                          None       # outFoundcount
                                          )
        if busy:
            textOfResults += "%d Results Found (still busy)\n"%(resultCount,)
        else:
            textOfResults += "%d Results Found\n"%(resultCount,)

        assert resultCount == len(scoresArray)

        # iterate over the results and tell the NSTextView what we found
        for score, hitID in zip(scoresArray, outDocumentIDsArray):
            hit = SearchKit.SKIndexCopyDocumentForDocumentID(self.myIndex, hitID)
            if hit is None:
                continue

            documentName = SearchKit.SKDocumentGetName(hit)

            textOfResults += "Score: %f ==> %s\n"%(score, documentName)

        self.searchResultsTextView.setString_(textOfResults)

    def addDocumentToIndex_(self, fileURL):
        # do some sanity checking to make sure we're not going to run into some
        # memory exception because we never initialized our SearchKit index
        # properly
        if self.myIndex is None:
            Cocoa.NSLog("myIndex is None - not processing %@",
                    fileURL)
            return

        Cocoa.NSLog("Processing %@", fileURL.absoluteString())
        # create the Search Kit document object
        # note that this method only accepts file:# style URL's
        aDocument = SearchKit.SKDocumentCreateWithURL(fileURL)

        # if you wanted to watch them process in, just uncomment the following
        # 2 lines.
        #NSLog("Name: %@", SearchKit.SKDocumentGetName(aDocument))
        #NSLog("Scheme: %@", SearchKit.SKDocumentGetSchemeName(aDocument))

        # add the document to the index
        if not SearchKit.SKIndexAddDocument(self.myIndex, # a reference ot the index added to
                                 aDocument, # the document we want to add
                                 None, # this could be a mime type hint in the form
                                      # of a CFStringRef
                                  1):   # a boolean value indicating the document
                                      # can be overwritten
            Cocoa.NSLog("There was a problem adding %@", fileURL)


    def displayIndexInformation(self):
        if self.myIndex is not None:
            # get the number of documents in the index
            numberOfDocuments = SearchKit.SKIndexGetDocumentCount(self.myIndex)
            # place it into the text field
            self.numberOfDocumentsTextField.setIntValue_(numberOfDocuments)

            # get the number of terms in the index
            maxTerm = SearchKit.SKIndexGetMaximumTermID(self.myIndex)
            # place it into the text field
            self.numberOfTermsTextField.setIntValue_(maxTerm)

        else:
            self.numberOfDocumentsTextField.setStringValue_('N/A')
            self.numberOfTermsTextField.setStringValue_('N/A')

main.py

from PyObjCTools import AppHelper

import AppController

# Uncomment the next two lines to make
# PyObjC more verbose, which helps during
# debugging.
import objc
objc.setVerbose(True)


AppHelper.runEventLoop()

setup.py

"""
Script for building the example.

Usage:
    python3 setup.py py2app
"""
from setuptools import setup

setup(
    name="PySearchKitExample1",
    app=["main.py"],
    data_files=["English.lproj"],
    setup_requires=[
        "py2app",
        "pyobjc-framework-Cocoa",
        "pyobjc-framework-SearchKit",
    ]
)

Resources