From 744e6e3248ae4b2f60fac606375eaf1db8828723 Mon Sep 17 00:00:00 2001 From: Nick Date: Sat, 21 Sep 2019 10:36:09 -0400 Subject: [PATCH] fix: restore unicode chars for search content --- package.json | 1 + server/models/pages.js | 8 +++++--- yarn.lock | Bin 603240 -> 603250 bytes 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/package.json b/package.json index 5c1807da..502076d6 100644 --- a/package.json +++ b/package.json @@ -76,6 +76,7 @@ "graphql-rate-limit-directive": "1.1.0", "graphql-subscriptions": "1.1.0", "graphql-tools": "4.0.5", + "he": "1.2.0", "highlight.js": "9.15.10", "i18next": "17.0.15", "i18next-express-middleware": "1.8.2", diff --git a/server/models/pages.js b/server/models/pages.js index 929589e7..f39cd94f 100644 --- a/server/models/pages.js +++ b/server/models/pages.js @@ -7,6 +7,7 @@ const fs = require('fs-extra') const yaml = require('js-yaml') const striptags = require('striptags') const emojiRegex = require('emoji-regex') +const he = require('he') /* global WIKI */ @@ -17,7 +18,7 @@ const frontmatterRegex = { } const punctuationRegex = /[!,:;/\\_+\-=()&#@<>$~%^*[\]{}"'|]+|(\.\s)|(\s\.)/ig -const htmlEntitiesRegex = /(&#[0-9]{3};)|(&#x[a-zA-Z0-9]{2};)/ig +// const htmlEntitiesRegex = /(&#[0-9]{3};)|(&#x[a-zA-Z0-9]{2};)/ig /** * Pages model @@ -663,9 +664,10 @@ module.exports = class Page extends Model { * @returns {string} Cleaned Content Text */ static cleanHTML(rawHTML = '') { - return striptags(rawHTML || '') + let data = striptags(rawHTML || '') .replace(emojiRegex(), '') - .replace(htmlEntitiesRegex, '') + // .replace(htmlEntitiesRegex, '') + return he.decode(data) .replace(punctuationRegex, ' ') .replace(/(\r\n|\n|\r)/gm, ' ') .replace(/\s\s+/g, ' ') diff --git a/yarn.lock b/yarn.lock index 8c1aecc1efb8794709e1bd91df310159a921e28d..8e785f50549d4df070abbc412eb66266c840eeab 100644 GIT binary patch delta 51 zcmaFyLgmv7m4+6^7N!>FEi4+^?1p+qdImZQ?W)==K+FonY(UHo#2njIwK<(R0k?<^ A{{R30 delta 43 scmezLLgmE^m4+6^7N!>FEi4+^?MB)xK+FonY(UHo#2nj=v^gC(0bQXD2mk;8