Issue:
Where can I access examples of javascript query stages and index stages?
Environment:
Fusion
Resolution:
There are several examples found in this document.
Index Stage Examples
Git repository with some examples
https://github.com/kmcowan/Lucidworks_Fusion_NashornJS
Add a new field
function (doc) { doc.addField('some-new-field', 'some-value'); return doc; } |
"Glue" two fields together - here we create a combined geo-coordinate field from separate latitude and longitude fields:
function(doc) { logger.debug("Appending Long to Lat for new field"); var value = ""; if (doc.hasField("Actor1Geo_Lat") && doc.hasField("Actor1Geo_Long")) { value = doc.getFirstFieldValue("Actor1Geo_Lat") + "," + doc.getFirstFieldValue("Actor1Geo_Long"); doc.addField("Actor1Geo_p", value); } if (doc.hasField("Actor2Geo_Lat") && doc.hasField("Actor2Geo_Long")) { value = doc.getFirstFieldValue("Actor2Geo_Lat") + "," + doc.getFirstFieldValue("Actor2Geo_Long"); doc.addField("Actor2Geo_p", value); } if (doc.hasField("ActionGeo_Lat") && doc.hasField("ActionGeo_Long")) { value = doc.getFirstFieldValue("ActionGeo_Lat") + "," + doc.getFirstFieldValue("ActionGeo_Long"); doc.addField("ActionGeo_p", value); } return doc; } |
Parse a JSON document with logging
var imports = new JavaImporter(Packages.sun.org.mozilla.javascript.internal.json.JsonParser); function(doc) { with (imports) { myData = JSON.parse(doc.getFirstFieldValue('body')); logger.info("parsed object"); for (var index in myData) { var entity = myData[index]; if (index == "track_id") { doc.addField("trackId_s",entity); } else if (index == "title") { doc.addField("title_txt",entity); } else if (index == "artist") { doc.addField("artist_s",entity) } else if (index == "tags") { for (var i=0; i<entity.length;i++) { var tag = entity[i][0]; doc.addField("tag_ss",tag); } } } } doc.removeFields("body"); return doc; } |
Modify a document to split it into multiple documents
function (doc) { var field = doc.getFieldValues('price'); var id = doc.getId(); var newDocs = []; for (i = 0; i < field.size(); i++) { newDocs.push( { 'id' : id+'-'+i, 'fields' : [ {'name' : 'subject', 'value' : field.get(i) } ] } ); } return newDocs; } |
Create and return a PipelineDocument
function (doc) { var price = doc.getFieldValues("price"); var id = doc.getId(); var newDocs = []; for (i = 0; i < price.size(); i++) { var pd = new com.lucidworks.apollo.common.pipeline.PipelineDocument(id+'-'+i ); pd.addField('subject', price.get(i)); newDocs.push( pd ); } return newDocs; } |
A function that iterates through all fields in a PipelineDocument object
function (doc) { var fields = doc.getFieldNames().toArray(); for (var i=0;i < fields.length;i++) { if (fields[i].indexOf("ATTR_NAME_") > -1) { //logger.info(fields[i]); var index = fields[i].replace("ATTR_NAME_",""); var newFieldName = doc.getFirstFieldValue(fields[i]); var newFieldValue = doc.getFirstFieldValue("ATTR_VALUE_"+index); doc.addField(newFieldName,newFieldValue); logger.info(newFieldName); doc.removeFields(fields[i]); doc.removeFields("ATTR_VALUE_"+index); } } return doc; } |
Reject the document by returning a null or empty array
function (doc) { if (doc.hasValue('foo')) { return null; // stop this document from being indexed. } return doc; } |
Query Stage Examples
Modify the user security trimming filter to be used
In this example, the User ACL’s in sharepoint are being generated with Siteminder prefixes. For datasource ds_SP_TESTST only, we want to change any ALLOW_USER or DENY_USER acl’s for ACMENET domain to add a Siteminder header to the query.
function(req, res, ctx, coll, solr){ var spTrimName = "ds_SP_TESTST" var oldFq = req.getParam(spTrimName) var newFq = [] logger.info(" ============= In javascript stage fq length {}", oldFq.length) for (var i=0; i<oldFq.length; ++i) { logger.info(" ============= The javascript stage before {}} = {}", i, oldFq[i]) var xSpl = oldFq[i].split(" ") var newval = "" for (var j=0; j<xSpl.length; ++j) { var nextVal = xSpl[j] if (newval != "") { newval += " " } if (nextVal.indexOf("acls_ss:SP_ALLOW_USER_ACMENET\\\\") == 0) { var userId = nextVal.replace("acls_ss:SP_ALLOW_USER_ACMENET\\\\", "") newval += "acls_ss:SP_ALLOW_USER_ACMENET\\\\*|SITEMINDERSTS|" + userId + "@ACMENET.NET" } else if (nextVal.indexOf("-acls_ss:SP_DENY_USER_ACMENET\\\\") == 0) { var userId = nextVal.replace("-acls_ss:SP_DENY_USER_ACMENET\\\\", "") newval += "-acls_ss:SP_DENY_USER_ACMENET\\\\*|SITEMINDERSTS|" + userId + "@ACMENET.NET" } else { newval += nextVal } } logger.info(" ============= The javascript stage after: {}", newval) newFq.push(newval) } req.removeParam(spTrimName) req.putParams(spTrimName, newFq) } |
Generic Examples (Can be used on either Index or Query stages)
Run a Solr Cloud Stream streamed query and use it in your javascript stage
function (doc) { var HashMap = java.util.HashMap; var Map = java.util.Map; var Tuple = org.apache.solr.client.solrj.io.Tuple; var CloudSolrStream = org.apache.solr.client.solrj.io.stream.CloudSolrStream; var e = java.lang.Exception; var cstream = org.apache.solr.client.solrj.io.stream.CloudSolrStream; var props = java.util.Map; var zkHost = "localhost:9983"; var collection = "fbo_test"; var cstream = null; var props = new HashMap(); try { props.put("q", "*:*"); props.put("qt", "/export"); props.put("sort", "id asc"); props.put("fl", "id"); props.put("rows", "20");
cstream = new CloudSolrStream(zkHost, collection, props); cstream.open(); while(true) {
var tuple = cstream.read(); if(tuple.EOF) { logger.info("BREAK"); break; } var fieldA = tuple.getString("id"); // Use the tuple data here logger.info(fieldA); } } catch (e) { logger.error(e); } return doc; } |
Tika Parser JS Example: function(doc) { var File = java.io.File; var FileInputStream = java.io.FileInputStream; var IOException = java.io.IOException; var InputStream = java.io.InputStream; var HashMap = java.util.HashMap; var Tika = org.apache.tika.Tika; var Metadata = org.apache.tika.metadata.Metadata; var AutoDetectParser = org.apache.tika.parser.AutoDetectParser; var ParseContext = org.apache.tika.parser.ParseContext; var OOXMLParser = org.apache.tika.parser.microsoft.ooxml.OOXMLParser; var PDFParser = org.apache.tika.parser.pdf.PDFParser; var BodyContentHandler = org.apache.tika.sax.BodyContentHandler; var xContentHandler = org.xml.sax.ContentHandler; var String = java.lang.String; var URL = java.net.URL; var base64 = java.util.Base64; var url = doc.getId(); var autoParser = new AutoDetectParser(); var tika = new Tika(); var pdfParser = new PDFParser(); // Get ready to parse the file. var textHandler = new BodyContentHandler(-1); var metadata = new Metadata(); var context = new ParseContext(); var map = new HashMap(); var ioe = java.io.IOException; var metadataNames = Java.type("java.lang.String[]"); var content = new String(); logger.info("*** BEGIN TIKA PARSE *** ");
try { var urlobj = new URL(url); var input = urlobj.openStream(); if ("application/pdf".equals(tika.detect(urlobj))) { pdfParser.parse(input, textHandler, metadata, context); metadataNames = metadata.names(); content = textHandler.toString(); } else if ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet".equals(tika.detect(urlobj))) { logger.info("Using XslX parser..."); var msofficeparser = new OOXMLParser(); msofficeparser.parse(input, textHandler, metadata, context); content = textHandler.toString(); // logger.info("Contents of the document:" + content); logger.info("Metadata of the document:"); metadataNames = metadata.names(); } else { autoParser.parse(input, textHandler, metadata, context); metadataNames = metadata.names(); content = textHandler.toString(); } if (content !== null) {
var encoder = base64.getEncoder(); var encoded = encoder.encodeToString(content.getBytes()); if(!doc.hasField("_raw_content_")){ doc.addField("_raw_content_", encoded); } else { doc.setField("_raw_content_", encoded); } } if (metadataNames !== null) { for (var name in metadataNames) { logger.info(name + ": " + metadata.get(name)); doc.addField(name, metadata.get(name)); } } } catch (ioe) { logger.error(ioe); } return doc; }
|
Comments
0 comments
Please sign in to leave a comment.