Topic:
Examples of Javascript query stages and index stages.
Environment:
Fusion 4 & 5
Example use cases:
There are several Index Stage and Query stage examples found here - https://github.com/kmcowan/Lucidworks_Fusion_NashornJS
Index stage examples
"Glue" two fields together - here we create a combined geo-coordinate field from separate latitude and longitude fields:
function(doc) {
logger.debug("Appending Long to Lat for new field");
var value = "";
if (doc.hasField("Actor1Geo_Lat") && doc.hasField("Actor1Geo_Long")) {
value = doc.getFirstFieldValue("Actor1Geo_Lat") + "," + doc.getFirstFieldValue("Actor1Geo_Long");
doc.addField("Actor1Geo_p", value);
}
if (doc.hasField("Actor2Geo_Lat") && doc.hasField("Actor2Geo_Long")) {
value = doc.getFirstFieldValue("Actor2Geo_Lat") + "," + doc.getFirstFieldValue("Actor2Geo_Long");
doc.addField("Actor2Geo_p", value);
}
if (doc.hasField("ActionGeo_Lat") && doc.hasField("ActionGeo_Long")) {
value = doc.getFirstFieldValue("ActionGeo_Lat") + "," + doc.getFirstFieldValue("ActionGeo_Long");
doc.addField("ActionGeo_p", value);
}
return doc;
}
Parse a JSON document with logging
var imports = new JavaImporter(Packages.sun.org.mozilla.javascript.internal.json.JsonParser);
function(doc) {
with (imports) {
myData = JSON.parse(doc.getFirstFieldValue('body'));
logger.info("parsed object");
for (var index in myData) {
var entity = myData[index];
if (index == "track_id") {
doc.addField("trackId_s",entity);
} else if (index == "title") {
doc.addField("title_txt",entity);
} else if (index == "artist") {
doc.addField("artist_s",entity)
} else if (index == "tags") {
for (var i=0; i<entity.length;i++) {
var tag = entity[i][0];
doc.addField("tag_ss",tag);
}
}
}
}
doc.removeFields("body");
return doc;
}
Modify a document to split it into multiple documents
function (doc) {
var field = doc.getFieldValues('price');
var id = doc.getId();
var newDocs = [];
for (i = 0; i < field.size(); i++) {
newDocs.push( { 'id' : id+'-'+i, 'fields' : [ {'name' : 'subject', 'value' : field.get(i) } ] } );
}
return newDocs;
}
Create and return a PipelineDocument
function (doc) {
var price = doc.getFieldValues("price");
var id = doc.getId();
var newDocs = [];
for (i = 0; i < price.size(); i++) {
var pd = new com.lucidworks.apollo.common.pipeline.PipelineDocument(id+'-'+i );
pd.addField('subject', price.get(i));
newDocs.push( pd );
}
return newDocs;
}
A function that iterates through all fields in a PipelineDocument object
function (doc) {
var fields = doc.getFieldNames().toArray();
for (var i=0;i < fields.length;i++) {
if (fields[i].indexOf("ATTR_NAME_") > -1) {
//logger.info(fields[i]);
var index = fields[i].replace("ATTR_NAME_","");
var newFieldName = doc.getFirstFieldValue(fields[i]);
var newFieldValue = doc.getFirstFieldValue("ATTR_VALUE_"+index);
doc.addField(newFieldName,newFieldValue);
logger.info(newFieldName);
doc.removeFields(fields[i]);
doc.removeFields("ATTR_VALUE_"+index);
}
}
return doc;
}
Reject the document by returning a null or empty array
function (doc) {
if (doc.hasValue('foo')) {
return null; // stop this document from being indexed.
}
return doc;
}
Query stage examples
Modify the user security trimming filter to be used
In this example, the User ACL’s in share-point are being generated with Siteminder prefixes. For datasource ds_SP_TESTST only, we want to change any ALLOW_USER or DENY_USER acl’s for ACMENET domain to add a Siteminder header to the query.
function(req, res, ctx, coll, solr){
var spTrimName = "ds_SP_TESTST"
var oldFq = req.getParam(spTrimName)
var newFq = []
logger.info(" ============= In javascript stage fq length {}", oldFq.length)
for (var i=0; i<oldFq.length; ++i) {
logger.info(" ============= The javascript stage before {}} = {}", i, oldFq[i])
var xSpl = oldFq[i].split(" ")
var newval = ""
for (var j=0; j<xSpl.length; ++j) {
var nextVal = xSpl[j]
if (newval != "") {
newval += " "
}
if (nextVal.indexOf("acls_ss:SP_ALLOW_USER_ACMENET\\\\") == 0) {
var userId = nextVal.replace("acls_ss:SP_ALLOW_USER_ACMENET\\\\", "")
newval += "acls_ss:SP_ALLOW_USER_ACMENET\\\\*|SITEMINDERSTS|" + userId + "@ACMENET.NET"
} else if (nextVal.indexOf("-acls_ss:SP_DENY_USER_ACMENET\\\\") == 0) {
var userId = nextVal.replace("-acls_ss:SP_DENY_USER_ACMENET\\\\", "")
newval += "-acls_ss:SP_DENY_USER_ACMENET\\\\*|SITEMINDERSTS|" + userId + "@ACMENET.NET"
} else {
newval += nextVal
}
}
logger.info(" ============= The javascript stage after: {}", newval)
newFq.push(newval)
}
req.removeParam(spTrimName)
req.putParams(spTrimName, newFq)
}
Generic examples (can be used on either index or query stages)
Run a Solr Cloud Stream streamed query and use it in your javascript stage
function (doc) {
var HashMap = java.util.HashMap;
var Map = java.util.Map;
var Tuple = org.apache.solr.client.solrj.io.Tuple;
var CloudSolrStream = org.apache.solr.client.solrj.io.stream.CloudSolrStream;
var e = java.lang.Exception;
var cstream = org.apache.solr.client.solrj.io.stream.CloudSolrStream;
var props = java.util.Map;
var zkHost = "localhost:9983";
var collection = "fbo_test";
var cstream = null;
var props = new HashMap();
try {
props.put("q", "*:*");
props.put("qt", "/export");
props.put("sort", "id asc");
props.put("fl", "id");
props.put("rows", "20");
cstream = new CloudSolrStream(zkHost, collection, props);
cstream.open();
while(true) {
var tuple = cstream.read();
if(tuple.EOF) {
logger.info("BREAK");
break;
}
var fieldA = tuple.getString("id"); // Use the tuple data here
logger.info(fieldA);
}
} catch (e) {
logger.error(e);
}
return doc;
}
Tika parser Javascript example:
function(doc) {
var File = java.io.File;
var FileInputStream = java.io.FileInputStream;
var IOException = java.io.IOException;
var InputStream = java.io.InputStream;
var HashMap = java.util.HashMap;
var Tika = org.apache.tika.Tika;
var Metadata = org.apache.tika.metadata.Metadata;
var AutoDetectParser = org.apache.tika.parser.AutoDetectParser;
var ParseContext = org.apache.tika.parser.ParseContext;
var OOXMLParser = org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
var PDFParser = org.apache.tika.parser.pdf.PDFParser;
var BodyContentHandler = org.apache.tika.sax.BodyContentHandler;
var xContentHandler = org.xml.sax.ContentHandler;
var String = java.lang.String;
var URL = java.net.URL;
var base64 = java.util.Base64;
var url = doc.getId();
var autoParser = new AutoDetectParser();
var tika = new Tika();
var pdfParser = new PDFParser();
// Get ready to parse the file.
var textHandler = new BodyContentHandler(-1);
var metadata = new Metadata();
var context = new ParseContext();
var map = new HashMap();
var ioe = java.io.IOException;
var metadataNames = Java.type("java.lang.String[]");
var content = new String();
logger.info("*** BEGIN TIKA PARSE *** ");
try {
var urlobj = new URL(url);
var input = urlobj.openStream();
if ("application/pdf".equals(tika.detect(urlobj))) {
pdfParser.parse(input, textHandler, metadata, context);
metadataNames = metadata.names();
content = textHandler.toString();
} else if ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet".equals(tika.detect(urlobj))) {
logger.info("Using XslX parser...");
var msofficeparser = new OOXMLParser();
msofficeparser.parse(input, textHandler, metadata, context);
content = textHandler.toString();
// logger.info("Contents of the document:" + content);
logger.info("Metadata of the document:");
metadataNames = metadata.names();
} else {
autoParser.parse(input, textHandler, metadata, context);
metadataNames = metadata.names();
content = textHandler.toString();
}
if (content !== null) {
var encoder = base64.getEncoder();
var encoded = encoder.encodeToString(content.getBytes());
if(!doc.hasField("_raw_content_")){
doc.addField("_raw_content_", encoded);
} else {
doc.setField("_raw_content_", encoded);
}
}
if (metadataNames !== null) {
for (var name in metadataNames) {
logger.info(name + ": " + metadata.get(name));
doc.addField(name, metadata.get(name));
}
}
} catch (ioe) {
logger.error(ioe);
}
return doc;
}
Comments
0 comments
Article is closed for comments.