Sample program to demonstrate search operation in buckets and objects:
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Amazon.S3;
using Amazon.S3.Model;
using Lucene.Net;
using Lucene.Net.Analysis;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.QueryParsers;
using Lucene.Net.Store;
using Lucene.Net.Analysis.Standard;
namespace SourceSearch
{
class Program
{
private const string bucketName = "ravi-rajamani-shared";
private const string keyName1 = "searchIndex";
private const string filePath = @"\Code\Index2";
private const string sourcePath = @"\code\API";
private static readonly RegionEndpoint bucketRegion = RegionEndpoint.EUWest1;
private static IAmazonS3 client;
static void Main(string[] args)
{
if (args.Count() != 1)
{
Console.WriteLine("Usage: SourceSearch <term>");
return;
}
client = new AmazonS3Client(bucketRegion);
var indexAt = SimpleFSDirectory.Open(new DirectoryInfo(@"C:\Code\Index2"));
var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
using (var indexer = new IndexWriter(
indexAt,
analyzer, true,
IndexWriter.MaxFieldLength.UNLIMITED))
{
var src = new DirectoryInfo(sourcePath);
var source = new SimpleFSDirectory(src);
src.EnumerateFiles("*.cs", SearchOption.AllDirectories).ToList()
.ForEach(x =>
{
using (var reader = File.OpenText(x.FullName))
{
var doc = new Document();
TeeSinkTokenFilter tfilter = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader));
TeeSinkTokenFilter.SinkTokenStream sink = tfilter.NewSinkTokenStream();
TokenStream final = new LowerCaseFilter(tfilter);
doc.Add(new Field("contents", final));
doc.Add(new Field("title", x.FullName, Field.Store.YES, Field.Index.ANALYZED));
indexer.AddDocument(doc);
// we persist this in ObjectStore:
// 1. Put object-specify o
try
{
var putRequest1 = new PutObjectRequest
{
BucketName = bucketName,
Key = x.FullName,
ContentBody = doc.ToString()
};
putRequest1.Metadata.Add("x-amz-meta-title", x.FullName);
PutObjectResponse response1 = await client.PutObjectAsync(putRequest1);
}
catch (Exception e)
{
Console.WriteLine(
"Error encountered ***. Message:'{0}' when writing an object"
, e.Message);
}
}
});
indexer.Optimize();
Console.WriteLine("Total number of files indexed : " + indexer.MaxDoc());
}
using (var reader = IndexReader.Open(indexAt, true))
{
var pos = reader.TermPositions(new Term("contents", args.First().ToLower()));
while (pos.Next())
{
Console.WriteLine("Match in document " + reader.Document(pos.Doc).GetValues("title").FirstOrDefault());
}
}
}
}
}
// Reference: https://1drv.ms/w/s!Ashlm-Nw-wnWtyVeqoXu7U9zEKuT
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Amazon.S3;
using Amazon.S3.Model;
using Lucene.Net;
using Lucene.Net.Analysis;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.QueryParsers;
using Lucene.Net.Store;
using Lucene.Net.Analysis.Standard;
namespace SourceSearch
{
class Program
{
private const string bucketName = "ravi-rajamani-shared";
private const string keyName1 = "searchIndex";
private const string filePath = @"\Code\Index2";
private const string sourcePath = @"\code\API";
private static readonly RegionEndpoint bucketRegion = RegionEndpoint.EUWest1;
private static IAmazonS3 client;
static void Main(string[] args)
{
if (args.Count() != 1)
{
Console.WriteLine("Usage: SourceSearch <term>");
return;
}
client = new AmazonS3Client(bucketRegion);
var indexAt = SimpleFSDirectory.Open(new DirectoryInfo(@"C:\Code\Index2"));
var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
using (var indexer = new IndexWriter(
indexAt,
analyzer, true,
IndexWriter.MaxFieldLength.UNLIMITED))
{
var src = new DirectoryInfo(sourcePath);
var source = new SimpleFSDirectory(src);
src.EnumerateFiles("*.cs", SearchOption.AllDirectories).ToList()
.ForEach(x =>
{
using (var reader = File.OpenText(x.FullName))
{
var doc = new Document();
TeeSinkTokenFilter tfilter = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader));
TeeSinkTokenFilter.SinkTokenStream sink = tfilter.NewSinkTokenStream();
TokenStream final = new LowerCaseFilter(tfilter);
doc.Add(new Field("contents", final));
doc.Add(new Field("title", x.FullName, Field.Store.YES, Field.Index.ANALYZED));
indexer.AddDocument(doc);
// we persist this in ObjectStore:
// 1. Put object-specify o
try
{
var putRequest1 = new PutObjectRequest
{
BucketName = bucketName,
Key = x.FullName,
ContentBody = doc.ToString()
};
putRequest1.Metadata.Add("x-amz-meta-title", x.FullName);
PutObjectResponse response1 = await client.PutObjectAsync(putRequest1);
}
catch (Exception e)
{
Console.WriteLine(
"Error encountered ***. Message:'{0}' when writing an object"
, e.Message);
}
}
});
indexer.Optimize();
Console.WriteLine("Total number of files indexed : " + indexer.MaxDoc());
}
using (var reader = IndexReader.Open(indexAt, true))
{
var pos = reader.TermPositions(new Term("contents", args.First().ToLower()));
while (pos.Next())
{
Console.WriteLine("Match in document " + reader.Document(pos.Doc).GetValues("title").FirstOrDefault());
}
}
}
}
}
// Reference: https://1drv.ms/w/s!Ashlm-Nw-wnWtyVeqoXu7U9zEKuT
No comments:
Post a Comment