From 343d4db0b36f724cb8ff424b40c1637f0f9283c4 Mon Sep 17 00:00:00 2001 From: Nick Date: Sun, 17 Mar 2019 21:52:16 -0400 Subject: [PATCH] feat: algolia search engine --- package.json | 1 + server/modules/search/algolia/definition.yml | 2 +- server/modules/search/algolia/engine.js | 204 +++++++++++++++++-- server/modules/search/aws/engine.js | 13 +- server/modules/search/azure/engine.js | 6 +- yarn.lock | Bin 576394 -> 579986 bytes 6 files changed, 203 insertions(+), 23 deletions(-) diff --git a/package.json b/package.json index f2380fd6..a45fd510 100644 --- a/package.json +++ b/package.json @@ -40,6 +40,7 @@ }, "dependencies": { "@bugsnag/js": "5.2.0", + "algoliasearch": "3.32.1", "apollo-fetch": "0.7.0", "apollo-server": "2.3.3", "apollo-server-express": "2.3.3", diff --git a/server/modules/search/algolia/definition.yml b/server/modules/search/algolia/definition.yml index 7edc7bb4..9976925a 100644 --- a/server/modules/search/algolia/definition.yml +++ b/server/modules/search/algolia/definition.yml @@ -4,7 +4,7 @@ description: Algolia is a powerful search-as-a-service solution, made easy to us author: requarks.io logo: https://static.requarks.io/logo/algolia.svg website: https://www.algolia.com/ -isAvailable: false +isAvailable: true props: appId: type: String diff --git a/server/modules/search/algolia/engine.js b/server/modules/search/algolia/engine.js index e7369ccd..217d2632 100644 --- a/server/modules/search/algolia/engine.js +++ b/server/modules/search/algolia/engine.js @@ -1,26 +1,202 @@ +const _ = require('lodash') +const algoliasearch = require('algoliasearch') +const { pipeline, Transform } = require('stream') + +/* global WIKI */ + module.exports = { - activate() { - + async activate() { + // not used }, - deactivate() { - + async deactivate() { + // not used }, - query() { + /** + * INIT + */ + async init() { + WIKI.logger.info(`(SEARCH/ALGOLIA) Initializing...`) + this.client = algoliasearch(this.config.appId, this.config.apiKey) + this.index = this.client.initIndex(this.config.indexName) + // -> Create Search Index + WIKI.logger.info(`(SEARCH/ALGOLIA) Setting index configuration...`) + await this.index.setSettings({ + searchableAttributes: [ + 'title', + 'description', + 'content' + ], + attributesToRetrieve: [ + 'locale', + 'path', + 'title', + 'description' + ], + advancedSyntax: true + }) + WIKI.logger.info(`(SEARCH/ALGOLIA) Initialization completed.`) }, - created() { - + /** + * QUERY + * + * @param {String} q Query + * @param {Object} opts Additional options + */ + async query(q, opts) { + try { + const results = await this.index.search({ + query: q, + hitsPerPage: 50 + }) + return { + results: _.map(results.hits, r => ({ + id: r.objectID, + locale: r.locale, + path: r.path, + title: r.title, + description: r.description + })), + suggestions: [], + totalHits: results.nbHits + } + } catch (err) { + WIKI.logger.warn('Search Engine Error:') + WIKI.logger.warn(err) + } }, - updated() { - + /** + * CREATE + * + * @param {Object} page Page to create + */ + async created(page) { + await this.index.addObject({ + objectID: page.hash, + locale: page.localeCode, + path: page.path, + title: page.title, + description: page.description, + content: page.content + }) }, - deleted() { - + /** + * UPDATE + * + * @param {Object} page Page to update + */ + async updated(page) { + await this.index.partialUpdateObject({ + objectID: page.hash, + title: page.title, + description: page.description, + content: page.content + }) }, - renamed() { - + /** + * DELETE + * + * @param {Object} page Page to delete + */ + async deleted(page) { + await this.index.deleteObject(page.hash) }, - rebuild() { + /** + * RENAME + * + * @param {Object} page Page to rename + */ + async renamed(page) { + await this.index.deleteObject(page.sourceHash) + await this.index.addObject({ + objectID: page.destinationHash, + locale: page.localeCode, + path: page.destinationPath, + title: page.title, + description: page.description, + content: page.content + }) + }, + /** + * REBUILD INDEX + */ + async rebuild() { + WIKI.logger.info(`(SEARCH/ALGOLIA) Rebuilding Index...`) + await this.index.clearIndex() + const MAX_DOCUMENT_BYTES = 10 * Math.pow(2, 10) // 10 KB + const MAX_INDEXING_BYTES = 10 * Math.pow(2, 20) - Buffer.from('[').byteLength - Buffer.from(']').byteLength // 10 MB + const MAX_INDEXING_COUNT = 1000 + const COMMA_BYTES = Buffer.from(',').byteLength + + let chunks = [] + let bytes = 0 + + const processDocument = async (cb, doc) => { + try { + if (doc) { + const docBytes = Buffer.from(JSON.stringify(doc)).byteLength + // -> Document too large + if (docBytes >= MAX_DOCUMENT_BYTES) { + throw new Error('Document exceeds maximum size allowed by Algolia.') + } + + // -> Current batch exceeds size hard limit, flush + if (docBytes + COMMA_BYTES + bytes >= MAX_INDEXING_BYTES) { + await flushBuffer() + } + + if (chunks.length > 0) { + bytes += COMMA_BYTES + } + bytes += docBytes + chunks.push(doc) + + // -> Current batch exceeds count soft limit, flush + if (chunks.length >= MAX_INDEXING_COUNT) { + await flushBuffer() + } + } else { + // -> End of stream, flush + await flushBuffer() + } + cb() + } catch (err) { + cb(err) + } + } + + const flushBuffer = async () => { + WIKI.logger.info(`(SEARCH/ALGOLIA) Sending batch of ${chunks.length}...`) + try { + await this.index.addObjects( + _.map(chunks, doc => ({ + objectID: doc.id, + locale: doc.locale, + path: doc.path, + title: doc.title, + description: doc.description, + content: doc.content + })) + ) + } catch (err) { + WIKI.logger.warn('(SEARCH/ALGOLIA) Failed to send batch to Algolia: ', err) + } + chunks.length = 0 + bytes = 0 + } + + await pipeline( + WIKI.models.knex.column({ id: 'hash' }, 'path', { locale: 'localeCode' }, 'title', 'description', 'content').select().from('pages').where({ + isPublished: true, + isPrivate: false + }).stream(), + new Transform({ + objectMode: true, + transform: async (chunk, enc, cb) => processDocument(cb, chunk), + flush: async (cb) => processDocument(cb) + }) + ) + WIKI.logger.info(`(SEARCH/ALGOLIA) Index rebuilt successfully.`) } } diff --git a/server/modules/search/aws/engine.js b/server/modules/search/aws/engine.js index 916cc572..599f1bae 100644 --- a/server/modules/search/aws/engine.js +++ b/server/modules/search/aws/engine.js @@ -2,6 +2,8 @@ const _ = require('lodash') const AWS = require('aws-sdk') const { pipeline, Transform } = require('stream') +/* global WIKI */ + module.exports = { async activate() { // not used @@ -110,12 +112,12 @@ module.exports = { rebuildIndex = true } - //-> Define suggester + // -> Define suggester const suggesters = await this.client.describeSuggesters({ DomainName: this.config.domain, SuggesterNames: ['default_suggester'] }).promise() - if(_.get(suggesters, 'Suggesters', []).length < 1) { + if (_.get(suggesters, 'Suggesters', []).length < 1) { WIKI.logger.info(`(SEARCH/AWS) Defining Suggester...`) await this.client.defineSuggester({ DomainName: this.config.domain, @@ -323,7 +325,7 @@ module.exports = { const flushBuffer = async () => { WIKI.logger.info(`(SEARCH/AWS) Sending batch of ${chunks.length}...`) try { - const resp = await this.clientDomain.uploadDocuments({ + await this.clientDomain.uploadDocuments({ contentType: 'application/json', documents: JSON.stringify(_.map(chunks, doc => ({ type: 'add', @@ -351,8 +353,8 @@ module.exports = { }).stream(), new Transform({ objectMode: true, - transform: async (chunk, enc, cb) => await processDocument(cb, chunk), - flush: async (cb) => await processDocument(cb) + transform: async (chunk, enc, cb) => processDocument(cb, chunk), + flush: async (cb) => processDocument(cb) }) ) @@ -364,4 +366,3 @@ module.exports = { WIKI.logger.info(`(SEARCH/AWS) Index rebuilt successfully.`) } } - diff --git a/server/modules/search/azure/engine.js b/server/modules/search/azure/engine.js index 0a9c0ef7..f242df58 100644 --- a/server/modules/search/azure/engine.js +++ b/server/modules/search/azure/engine.js @@ -3,6 +3,8 @@ const { SearchService, QueryType } = require('azure-search-client') const request = require('request-promise') const { pipeline } = require('stream') +/* global WIKI */ + module.exports = { async activate() { // not used @@ -20,7 +22,7 @@ module.exports = { // -> Create Search Index const indexes = await this.client.indexes.list() if (!_.find(_.get(indexes, 'result.value', []), ['name', this.config.indexName])) { - WIKI.logger.info(`(SEARCH/AWS) Creating index...`) + WIKI.logger.info(`(SEARCH/AZURE) Creating index...`) await this.client.indexes.create({ name: this.config.indexName, fields: [ @@ -74,7 +76,7 @@ module.exports = { searchMode: 'analyzingInfixMatching', sourceFields: ['title', 'description', 'content'] } - ], + ] }) } WIKI.logger.info(`(SEARCH/AZURE) Initialization completed.`) diff --git a/yarn.lock b/yarn.lock index a7a7a3fca93c03bf5e78ab78c82a8a03b2b77bdc..921b3e569e52f4a5a73bb2be0b0b03e843132756 100644 GIT binary patch delta 2421 zcmY*aON<;x8P>dQqu4R_uGhvfi+w4&&xZzv-8-U=dMshbyxLl@AORX zbkFR}tRS)lAyR~h6-%ig6d^|mis0fv#7Ma$2Z)dmgj^7gA`l3P1LTll*Y|(l_dn0Bz4*&(x8CwSyTk(d$a(gkvMveb3o`O9Onu2si{?Mx-ttV~Kr`@n^R9>uP)e*-2U!yg6CvHod)qKxgw){^n$|9!{#UNIreQ$j#CRV*5Uf_0T( z5u1`EBS8SBL;wb&4poUlmhwj~)JquWi@HWTlR4^+b`%*p|*}Q(XBBo zl?Q`rZ&=Lhl|yVsm94?%mQ=7EPv2?gMtxly$fN3Rv5|EvwY*45cYRAM%y~5wI@6)_ z5Zl%fUA_7=$Bg}ZDewhCdq0Q_MiRB>{c)w%MzzMpXBv>G>r2I{GE{rDWWJMVknZKnu?}j0%Zsk zVrmd!9SZ_B2m(xaC%ANZZ?e&a`PzmicL$|fT8q}O>+HCt-E@0x`uzv~pI!~kab{;< z2rsi2-VY{H3*dR^#UMV1c^=q=vsgse2vUKdOBCsnqUuNhqDr6%bfgNJsjz#izUBHx zVW-h>QoF8nxG|O{8_kk;taZzt?u_kH%Ni=RQo2XG$wG2i%ahLJxYf)a=Q9PRUUTez zT5sd@F&J#;y)4`<*&Y~fY#dm3Z!fQ~SDpx*VmGW{ zEun4_!F2k~os@00fxAzJwM?EWUSm*@x2Nw#0zZtNh^^8&ZJG0nNR~Yb0-@?;15whXJMN}((L}rf#4dpdCY}i zI0?91<{_zqG*uIkiqVXLP}P8{3lPXUw+;hPs7X{KU~XlXWU;%s=^T2bSl-NRs_lCCyp`fE|kH&NqZz|u5*KWT+`3YE-OqQyb|b5gV=TPOK(Lk zviR@9G4{t=@bh4XhBQwNs#MVpT?Z!QGSC$bLPJG-sA>`+rU4aI&~t;Lc&GnI%u@73ks4L}~ zR(89cRBdmEsSK<2!)OIpszc8x>V@sKDS0FKRPXfvu)e|VPGCRysy|hHyaYUx$3HpI zX7!k3tLQ{jMG8$>l|)KJP1bcqQVl{hQ!q{5NK{cm?2|u5&RrJbR4ZhpWHYxlC^Uw! zT7-M58MP*v9f$d@_%5*fABTfa%xrINhcg?WetS9e>u;SFVyljRK#f7XLnj_9|2?q6 z?q`BAcK7lyNY~8HiEF0r-YE(aLudO{78-=E^QTRFx&e+O+^5xJ9`^G2`WADBZj(m*+ z`jE%mzvYJt;>}+rDpo0!43T$TfHi@eDoD_DlhX|#KxGBX22ypd8wwQxafe3&`%=ljTpPEP zo8t~mI{C7^VeVBsieevCa8l6^iu+o6nCVF6veS}VwU%2{Jg^rnb}D8yYaWBX;%Y*A ze}BE|scp+jHgct;a=f3Z+U1BpJjIC%reE4Tfn17)|jU|s|xQ@2mD1Q%|X(jf&Dw{jr`t6+ybg9W!d kg9a8ew-UDo$1ax$SOh({F6IUaFSlD32c6`%2>S<$-tF=-DF6Tf