· 5 min read

Implementing Search with Lucene in .Net

Here is how you can implement search on any website.

Here is how you can implement search on any website.

Here is how you can implement search on any website.

A post on this can be found here

Import all the Usings..

using Lucene.Net.Analysis.Standard;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
using Lucene.Net.Store;
using System;
using System.Collections.Generic;
using [System.IO](http://system.io/);
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Version = Lucene.Net.Util.Version;

The Lucene service class

public static class LuceneService
{

    private static string LUCENE_DIR;
    //private static string _luceneDir = Path.Combine(HttpContext.Current.Request.PhysicalApplicationPath, "lucene_index");
    private static FSDirectory _directoryTemp;
    private static FSDirectory _directory
	{
        get
        {
            if (_directoryTemp == null) _directoryTemp =  FSDirectory.Open(new DirectoryInfo(LUCENE_DIR));
            if (IndexWriter.IsLocked(_directoryTemp)) IndexWriter.Unlock(_directoryTemp);
            var lockFilePath = Path.Combine(LUCENE_DIR, "write.lock");
            if (File.Exists(lockFilePath)) File.Delete(lockFilePath);
            return _directoryTemp;
        }
    }

    public static void Init(string luceneDir)
    {
        // Construct a machine-independent path for the index
        //var basePath = Environment.GetFolderPath(Environment.SpecialFolder.CommonApplicationData);
        //var indexPath = Path.Combine(basePath, "_index");
        var indexPath = Path.Combine(luceneDir, "_index");

        //private string LuceneDir => Path.Combine(_env.ContentRootPath, "Lucene_Index");

        //using var dir = FSDirectory.Open(indexPath);

        if (string.IsNullOrEmpty(LUCENE_DIR))
        {
            LUCENE_DIR = indexPath;
        }
        //throw new NotImplementedException();
    }

    private static void AddToLuceneIndex(SearchData SearchData, IndexWriter writer)
    {
        // remove older index entry
        var searchQuery = new TermQuery(new Term("Id", SearchData.Id.ToString()));
        writer.DeleteDocuments(searchQuery);

        // add new index entry
        var doc = new Document();

        // add lucene fields mapped to db fields
        doc.Add(new Field("Id", SearchData.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
        doc.Add(new Field("Heading", SearchData.Heading, Field.Store.YES, Field.Index.ANALYZED));
        doc.Add(new Field("Body", SearchData.Body, Field.Store.YES, Field.Index.ANALYZED));
        doc.Add(new Field("GeneralSynopsis", SearchData.GeneralSynopsis??"", Field.Store.YES, Field.Index.ANALYZED));
        doc.Add(new Field("Tags", SearchData.Tags, Field.Store.YES, Field.Index.ANALYZED));
        doc.Add(new Field("IsLive", SearchData.IsLive.ToString(), Field.Store.YES, Field.Index.ANALYZED));

        // add entry to index
        writer.AddDocument(doc);
    }

    public static void AddUpdateLuceneIndex(IEnumerable<SearchData> SearchDataList)
    {
        // init lucene
        var analyzer = new StandardAnalyzer(Version.LUCENE_30);
        using (var writer = new IndexWriter(_directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
        {
            // add data to lucene search index (replaces older entry if any)
            foreach (var sampleData in SearchDataList) AddToLuceneIndex(sampleData, writer);

            // close handles
            analyzer.Close();
            writer.Dispose();
        }
    }

    public static void AddUpdateLuceneIndex(SearchData SearchData)
    {
        AddUpdateLuceneIndex(new List<SearchData> { SearchData });
    }

    public static void ClearLuceneIndexRecord(int record_id)
    {
        // init lucene
        var analyzer = new StandardAnalyzer(Version.LUCENE_30);
        using (var writer = new IndexWriter(_directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
        {
            // remove older index entry
            var searchQuery = new TermQuery(new Term("Id", record_id.ToString()));
            writer.DeleteDocuments(searchQuery);

            // close handles
            analyzer.Close();
            writer.Dispose();
        }
    }

    public static bool ClearLuceneIndex()
    {
        try
        {
            var analyzer = new StandardAnalyzer(Version.LUCENE_30);
            using (var writer = new IndexWriter(_directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED))
            {
                // remove older index entries
                writer.DeleteAll();

                // close handles
                analyzer.Close();
                writer.Dispose();
            }
        }
        catch (Exception)
        {
            return false;
        }
        return true;
    }

    public static void Optimize()
    {
        var analyzer = new StandardAnalyzer(Version.LUCENE_30);
        using (var writer = new IndexWriter(_directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
        {
            analyzer.Close();
            writer.Optimize();
            writer.Dispose();
        }
    }

    private static SearchData MapLuceneDocumentToData(Document doc)
    {
        return new SearchData
        {
            Id = Convert.ToInt32(doc.Get("Id")),
            Heading = doc.Get("Heading"),
            Body = doc.Get("Body"),
            GeneralSynopsis = doc.Get("GeneralSynopsis"),
            Tags = doc.Get("Tags"),
            IsLive = Boolean.Parse(doc.Get("IsLive")),

        };
    }

    private static IEnumerable<SearchData> MapLuceneToDataList(IEnumerable<Document> hits, int skipRecords, int takeRecords)
    {
        IList<Document> luceneDocuments = new List<Document>();
        for (int i = skipRecords; i < hits.Count(); i++)
        {
            if (i > (skipRecords + takeRecords) - 1)
            {
                break;
            }

            luceneDocuments.Add(hits.ElementAt(i));
        }

        return luceneDocuments.Select(x => MapLuceneDocumentToData(x)).ToList();
        // return hits.Select(MapLuceneDocumentToData).ToList();
    }
    private static IEnumerable<SearchData> MapLuceneToDataList(IEnumerable<ScoreDoc> hits, IndexSearcher searcher, int skipRecords, int takeRecords)
    {
        IList<Document> luceneDocuments = new List<Document>();
        for (int i = skipRecords; i < hits.Count(); i++)
        {
            if (i > (skipRecords + takeRecords) - 1)
            {
                break;
            }

            luceneDocuments.Add(searcher.Doc(hits.ElementAt(i).Doc));
        }

        return luceneDocuments.Select(x => MapLuceneDocumentToData(x)).ToList();

        //return hits.Select(hit => MapLuceneDocumentToData(searcher.Doc(hit.Doc))).ToList();
    }

    private static Query parseQuery(string searchQuery, QueryParser parser)
    {
        Query query;
        try
        {
            query = parser.Parse(searchQuery.Trim());
        }
        catch (ParseException)
        {
            query = parser.Parse(QueryParser.Escape(searchQuery.Trim()));
        }
        return query;
    }

    private static IEnumerable<SearchData> LuceneSearch(string searchQuery, int skipRecords, int takeRecords, string searchField = "")
    {
        // validation
        if (string.IsNullOrEmpty(searchQuery.Replace("*", "").Replace("?", ""))) return new List<SearchData>();

        // set up lucene searcher
        using (var searcher = new IndexSearcher(_directory, false))
        {
            var hits_limit = 1000;
            var analyzer = new StandardAnalyzer(Version.LUCENE_30);

            // search by single field
            if (!string.IsNullOrEmpty(searchField))
            {
                var parser = new QueryParser(Version.LUCENE_30, searchField, analyzer);
                var query = parseQuery(searchQuery, parser);
                var hits = searcher.Search(query, hits_limit).ScoreDocs;
                var results = MapLuceneToDataList(hits, searcher, skipRecords, takeRecords);
                analyzer.Close();
                searcher.Dispose();
                return results;
            }
            // search by multiple fields (ordered by RELEVANCE)
            else
            {
                var parser = new MultiFieldQueryParser
                    (Version.LUCENE_30, new[] { "Id", "Heading", "Body", "GeneralSynopsis", "Tags", "IsLive" }, analyzer);
                var query = parseQuery(searchQuery, parser);
                var hits = searcher.Search
                (query, null, hits_limit, Sort.RELEVANCE).ScoreDocs;
                var results = MapLuceneToDataList(hits, searcher, skipRecords, takeRecords);
                analyzer.Close();
                searcher.Dispose();
                return results;
            }
        }
    }

    public static IEnumerable<SearchData> Search(string input, int skipRecords, int takeRecords, string fieldName = "", bool isLive = true, string tagsString = null, string depotString = null)
    {
        if (string.IsNullOrEmpty(input) && string.IsNullOrEmpty(tagsString))
        {
            return new List<SearchData>();
        }

        var terms = input.Trim().Replace("-", " ").Split(' ')
            .Where(x => !string.IsNullOrEmpty(x)).Select(x => x.Trim() + "*");
        input = string.Join(" ", terms);

        var isLiveString = isLive ? "IsLive:true" : "";
        var tagsStringQuery = !string.IsNullOrEmpty(tagsString) ? "(" + tagsString + ")" : "";

        List<string> queryItems = new List<string>() { input, isLiveString, tagsStringQuery, depotStringQuery };
        var filteredQueryItems = queryItems.Where(x=> !string.IsNullOrEmpty(x));

        var query = string.Join(" AND ", filteredQueryItems);

        return LuceneSearch(query, skipRecords, takeRecords, fieldName);
    }

    public static IEnumerable<SearchData> SearchDefault(string input, int skipRecords, int takeRecords, string fieldName = "")
    {
        return string.IsNullOrEmpty(input) ? new List<SearchData>() : LuceneSearch(input, skipRecords, takeRecords, fieldName);
    }

    public static IEnumerable<SearchData> GetAllIndexRecords()
    {
        // validate search index
        if (!System.IO.Directory.EnumerateFiles(LUCENE_DIR).Any()) return new List<SearchData>();

        // set up lucene searcher
        var searcher = new IndexSearcher(_directory, false);
        var reader = IndexReader.Open(_directory, false);
        var docs = new List<Document>();
        var term = reader.TermDocs();
        while (term.Next()) docs.Add(searcher.Doc(term.Doc));
        reader.Dispose();
        searcher.Dispose();
        return MapLuceneToDataList(docs, 0, 1000);
    }

}

You can place the generated indexes wherever you like, but here is one way to keep it within your project.

In Startup.cs

string contentRoot = "";
public Startup(IConfiguration configuration, IWebHostEnvironment env)
{
  Configuration = configuration;
  contentRoot = env.ContentRootPath;
}

public void Configure(IApplicationBuilder app, IWebHostEnvironment env, ILoggerFactory loggerFactory, ISearchService searchService)
{
  ///
  var searchIndexPath = Path.Combine(contentRoot, "SearchIndex");
  searchService.CreateLuceneIndex(searchIndexPath);
}

The SearchService class would contain something like:

private void CreateLuceneIndex(string path)
{
  LuceneService.Init(path);
  // returns List<SearchData> in format expected
  var searcDataList = _postService.GetAllPostsForSearchIndex();
  LuceneService.AddUpdateLuceneIndex(searcDataList);

  LuceneService.Optimize();
}

Remeber when editing and deleting posts this will require an update to the search index.

LuceneService.AddUpdateLuceneIndex(newlyUpdatedSearchData);

LuceneService.ClearLuceneIndexRecord(postId);
Back to Blog

Related Posts

View All Posts »
Paging In .Net Core with C# and Linq

Paging In .Net Core with C# and Linq

Almost every medium to large site requires some sort of paging through lists of information. The advantage of paging is that you only need to bring back a limited result set.

How to Connect to a DigitalOcean Managed Database When Your IP Address Changes

How to Connect to a DigitalOcean Managed Database When Your IP Address Changes

If you're working with a DigitalOcean managed database from your local machine, you've probably run into this frustrating scenario: everything works perfectly one day, then suddenly your database connection times out. The culprit? Your IP address changed, and it's no longer in the database's trusted sources list.