Ho cercato di implementare Lucene per rendere più veloce la ricerca sul mio sito web.Utilizzo di Apache Lucene per la ricerca
Il mio codice funziona attualmente, tuttavia, penso di non utilizzare correttamente Lucene. In questo momento, la mia query di ricerca è productName:asterisk(input)asterisk
- Non riesco a immaginare questo è ciò che si dovrebbe fare per trovare tutti i prodotti in cui productName
contiene input
. Penso che abbia qualcosa a che fare con il modo in cui salvi i campi su un documento.
Il mio codice:
LuceneHelper.cs
using System;
using System.Collections;
using System.Collections.Generic;
using System.Data.Entity.Migrations.Model;
using System.Linq;
using System.Threading.Tasks;
using Lucene.Net;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Rentpro.Models;
using RentPro.Models.Tables;
using RentProModels.Models;
namespace RentPro.Helpers
{
public class LuceneHelper
{
private const Lucene.Net.Util.Version Version = Lucene.Net.Util.Version.LUCENE_30;
private bool IndicesInitialized;
private List<Language> Languages = new List<Language>();
public void BuildIndices(DB db)
{
Languages = GetLanguages(db);
Analyzer analyzer = new StandardAnalyzer(Version);
List<Product> allProducts = db.GetAllProducts(true, false);
foreach (Language l in Languages)
{
BuildIndicesForLanguage(allProducts, analyzer, l.ID);
}
IndicesInitialized = true;
}
private void BuildIndicesForLanguage(List<Product> products, Analyzer analyzer, int id = 0)
{
using (
IndexWriter indexWriter = new IndexWriter(GetDirectory(id), analyzer,
IndexWriter.MaxFieldLength.UNLIMITED))
{
var x = products.Count;
foreach (Product p in products)
{
SearchProduct product = SearchProduct.FromProduct(p, id);
Document document = new Document();
Field productIdField = new Field("productId", product.ID.ToString(), Field.Store.YES, Field.Index.NO);
Field productTitleField = new Field("productName", product.Name, Field.Store.YES, Field.Index.ANALYZED);
Field productDescriptionField = new Field("productDescription", product.Description, Field.Store.YES, Field.Index.ANALYZED);
Field productCategoryField = new Field("productCategory", product.Category, Field.Store.YES, Field.Index.ANALYZED);
Field productCategorySynonymField = new Field("productCategorySynonym", product.CategorySynonym, Field.Store.YES, Field.Index.ANALYZED);
Field productImageUrlField = new Field("productImageUrl", product.ImageUrl, Field.Store.YES, Field.Index.NO);
Field productTypeField = new Field("productType", product.Type, Field.Store.YES, Field.Index.NO);
Field productDescriptionShortField = new Field("productDescriptionShort", product.DescriptionShort, Field.Store.YES, Field.Index.NO);
Field productPriceField = new Field("productPrice", product.Price, Field.Store.YES, Field.Index.NO);
document.Add(productIdField);
document.Add(productTitleField);
document.Add(productDescriptionField);
document.Add(productCategoryField);
document.Add(productCategorySynonymField);
document.Add(productImageUrlField);
document.Add(productTypeField);
document.Add(productDescriptionShortField);
document.Add(productPriceField);
indexWriter.AddDocument(document);
}
indexWriter.Optimize();
indexWriter.Commit();
}
}
public List<SearchProduct> Search(string input)
{
if (!IndicesInitialized)
{
BuildIndices(new DB());
return Search(input);
}
IndexReader reader = IndexReader.Open(GetCurrentDirectory(), true);
Searcher searcher = new IndexSearcher(reader);
Analyzer analyzer = new StandardAnalyzer(Version);
TopScoreDocCollector collector = TopScoreDocCollector.Create(100, true);
MultiFieldQueryParser parser = new MultiFieldQueryParser(Version,
new[] { "productDescription", "productCategory", "productCategorySynonym", "productName" }, analyzer)
{
AllowLeadingWildcard = true
};
searcher.Search(parser.Parse("*" + input + "*"), collector);
ScoreDoc[] hits = collector.TopDocs().ScoreDocs;
List<int> productIds = new List<int>();
List<SearchProduct> results = new List<SearchProduct>();
foreach (ScoreDoc scoreDoc in hits)
{
Document document = searcher.Doc(scoreDoc.Doc);
int productId = int.Parse(document.Get("productId"));
if (!productIds.Contains(productId))
{
productIds.Add(productId);
SearchProduct result = new SearchProduct
{
ID = productId,
Description = document.Get("productDescription"),
Name = document.Get("productName"),
Category = document.Get("productCategory"),
CategorySynonym = document.Get("productCategorySynonym"),
ImageUrl = document.Get("productImageUrl"),
Type = document.Get("productType"),
DescriptionShort = document.Get("productDescriptionShort"),
Price = document.Get("productPrice")
};
results.Add(result);
}
}
reader.Dispose();
searcher.Dispose();
analyzer.Dispose();
return results;
}
private string GetDirectoryPath(int languageId = 1)
{
return GetDirectoryPath(Languages.SingleOrDefault(x => x.ID == languageId).UriPart);
}
private string GetDirectoryPath(string languageUri)
{
return AppDomain.CurrentDomain.BaseDirectory + @"\App_Data\LuceneIndices\" + languageUri;
}
private List<Language> GetLanguages(DB db)
{
return db.Languages.ToList();
}
private int GetCurrentLanguageId()
{
return Translator.GetCurrentLanguageID();
}
private FSDirectory GetCurrentDirectory()
{
return FSDirectory.Open(GetDirectoryPath(GetCurrentLanguageId()));
}
private FSDirectory GetDirectory(int languageId)
{
return FSDirectory.Open(GetDirectoryPath(languageId));
}
}
public class SearchProduct
{
public int ID { get; set; }
public string Description { get; set; }
public string Name { get; set; }
public string ImageUrl { get; set; }
public string Type { get; set; }
public string DescriptionShort { get; set; }
public string Price { get; set; }
public string Category { get; set; }
public string CategorySynonym { get; set; }
public static SearchProduct FromProduct(Product p, int languageId)
{
return new SearchProduct()
{
ID = p.ID,
Description = p.GetText(languageId, ProductLanguageType.Description),
Name = p.GetText(languageId),
ImageUrl =
p.Images.Count > 0
? "/Company/" + Settings.Get("FolderName") + "/Pictures/Products/100x100/" +
p.Images.Single(x => x.Type == "Main").Url
: "",
Type = p is HuurProduct ? "HuurProduct" : "KoopProduct",
DescriptionShort = p.GetText(languageId, ProductLanguageType.DescriptionShort),
Price = p is HuurProduct ? ((HuurProduct)p).CalculatedPrice(1, !Settings.GetBool("BTWExLeading")).ToString("0.00") : "",
Category = p.Category.Name,
CategorySynonym = p.Category.Synonym
};
}
}
}
Come io chiamo la LuceneHelper:
public ActionResult Lucene(string SearchString, string SearchOrderBy, int? page, int? amount)
{
List<SearchProduct> searchResults = new List<SearchProduct>();
if (!SearchString.IsNullOrWhiteSpace())
{
LuceneHelper lucene = new LuceneHelper();
searchResults = lucene.Search(SearchString);
}
return View(new LuceneSearchResultsVM(db, SearchString, searchResults, SearchOrderBy, page ?? 1, amount ?? 10));
}
LuceneSearchResultsVM:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Linq.Dynamic;
using System.Web;
using RentPro.Models.Tables;
using System.Linq.Expressions;
using System.Reflection;
using Microsoft.Ajax.Utilities;
using Rentpro.Models;
using RentPro.Helpers;
using RentProModels.Models;
namespace RentPro.ViewModels
{
public class LuceneSearchResultsVM
{
public List<SearchProduct> SearchProducts { get; set; }
public bool BTWActive { get; set; }
public bool BTWEXInput { get; set; }
public bool BTWShow { get; set; }
public bool BTWExLeading { get; set; }
public string FolderName { get; set; }
public string CurrentSearchString { get; set; }
public string SearchOrderBy { get; set; }
public int Page;
public int Amount;
public String SearchQueryString {
get
{
return Translator.Translate("Zoekresultaten voor") + ": " + CurrentSearchString + " (" +
SearchProducts.Count + " " + Translator.Translate("resultaten") + " - " +
Translator.Translate("pagina") + " " + Page + " " + Translator.Translate("van") + " " +
CalculateAmountOfPages() + ")";
}
set { }
}
public LuceneSearchResultsVM(DB db, string queryString, List<SearchProduct> results, string searchOrderBy, int page, int amt)
{
BTWActive = Settings.GetBool("BTWActive");
BTWEXInput = Settings.GetBool("BTWEXInput");
BTWShow = Settings.GetBool("BTWShow");
BTWExLeading = Settings.GetBool("BTWExLeading");
FolderName = Settings.Get("FolderName");
SearchProducts = results;
CurrentSearchString = queryString;
if (searchOrderBy.IsNullOrWhiteSpace())
{
searchOrderBy = "Name";
}
SearchOrderBy = searchOrderBy;
Amount = amt == 0 ? 10 : amt;
int maxPages = CalculateAmountOfPages();
Page = page > maxPages ? maxPages : page;
SearchLog.MakeEntry(queryString, SearchProducts.Count(), db, HttpContext.Current.Request.UserHostAddress);
}
public List<SearchProduct> GetOrderedList()
{
List<SearchProduct> copySearchProductList = new List<SearchProduct>(SearchProducts);
copySearchProductList = copySearchProductList.Skip((Page - 1) * Amount).Take(Amount).ToList();
switch (SearchOrderBy)
{
case "Price":
copySearchProductList.Sort(new PriceSorter());
break;
case "DateCreated":
return copySearchProductList; //TODO
default:
return copySearchProductList.OrderBy(n => n.Name).ToList();
}
return copySearchProductList;
}
public int CalculateAmountOfPages()
{
int items = SearchProducts.Count;
return items/Amount + (items % Amount > 0 ? 1 : 0);
}
}
public class PriceSorter : IComparer<SearchProduct>
{
public int Compare(SearchProduct x, SearchProduct y)
{
if (x == null || x.Price == "") return 1;
if (y == null || y.Price == "") return -1;
decimal priceX = decimal.Parse(x.Price);
decimal priceY = decimal.Parse(y.Price);
return priceX > priceY ? 1 : priceX == priceY ? 0 : -1;
}
}
}
Qualsiasi aiuto sarebbe molto apprezzato.
lista di input Esempi di prodotti:
Query: SELECT Product.ID, Product.Decription, Product.Name FROM Product
SQL Server query equivalenti: SELECT Product.ID, Product.Decription, Product.Name FROM Product WHERE Product.Name LIKE '%Zelf%' OR Product.Decription LIKE '%Zelf%'
Fondamentalmente, Zelf
è l'ingresso . Voglio trovare tutte le corrispondenze con le descrizioni dei prodotti o i nomi dei prodotti che contengono la stringa di input.
Non c'è bisogno di aggiungere il nome del campo per interrogare il testo: 'Parser.parse ("PRODOTTO: *" + input + "*") ', perché lo hai già fatto qui:' new QueryParser (Version, "productName", analyzer) ' –
Sì, ma se non setto manualmente la query del parser a' productName: * input * 'restituisce solo corrispondenze esatte, non corrispondenze che contengono la stringa di input. – nbokmans
Cosa dire di '* input *' - without 'productName:'? –