Friday, September 20, 2013

In the previous post, we mentioned how we can index and store text with Lucene so that we can build a source index server. I also mentioned a caveat that unlike the Java version which may have a method to add files recursively from a directory, the Lucene.Net library does not come with it. So you build an index this way:
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Lucene.Net;
using Lucene.Net.Analysis;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.QueryParsers;
using Lucene.Net.Store;

namespace SourceSearch
{
    class Program
    {
        static void Main(string[] args)
        {
            if (args.Count() != 1)
            {
                Console.WriteLine("Usage: SourceSearch <term>");
                return;
            }

            var indexAt = SimpleFSDirectory.Open(new DirectoryInfo(Environment.SpecialFolder.LocalApplicationData.ToString()));
            using (var indexer = new IndexWriter(
                indexAt,
                new SimpleAnalyzer(),
                IndexWriter.MaxFieldLength.UNLIMITED))
            {

                var src = new DirectoryInfo(@"C:\code\text");
                var source = new SimpleFSDirectory(src);

                src.EnumerateFiles("*.cs", SearchOption.AllDirectories).ToList()
                    .ForEach(x =>
                        {
                            using (var reader = File.OpenText(x.FullName))
                            {
                                var doc = new Document();
                                doc.Add(new Field("contents", reader));
                                doc.Add(new Field("title", x.FullName, Field.Store.YES, Field.Index.ANALYZED));
                                indexer.AddDocument(doc);
                            }
                        });

                indexer.Optimize();
                Console.WriteLine("Total number of files indexed : " + indexer.MaxDoc());
            }

            using (var reader = IndexReader.Open(indexAt, true))
            {
                var pos = reader.TermPositions(new Term("contents", args.First().ToLower()));
                while (pos.Next())
                {
                    Console.WriteLine("Match in document " + reader.Document(pos.Doc).GetValues("title").FirstOrDefault());
                }
            }
        }
    }
}

No comments:

Post a Comment