UNDP-Accelerator-Labs · this-pama · Oct 12, 2023 · Jan 29, 2024
diff --git a/config/db/index.js b/config/db/index.js
@@ -1,4 +1,4 @@
-const logSQL = true
+const logSQL = false
 const initOptions = {
 	query(e) {
 		if (logSQL) console.log(e.query)

diff --git a/config/edit/index.js b/config/edit/index.js
@@ -9,9 +9,9 @@ exports.app_description = require('./translations.js').translations['app descrip
 
 // apps_in_suite NEED TO BE THE NAMES OF THE DIFFERENT DBs
 exports.apps_in_suite = [
-	{ name: 'Action Plans', key: process.env.NODE_ENV === 'production' ? 'action_plans_platform' : (process.env.DB_AP || 'ap_test_02'), baseurl: 'https://acclabs-actionlearningplans.azurewebsites.net/' },
-	{ name: 'Solutions Mapping', key: process.env.NODE_ENV === 'production' ? 'solutions_mapping_platform' : (process.env.DB_SM || 'sm_test_02'), baseurl: 'https://acclabs-solutionsmapping.azurewebsites.net/' },
-	{ name: 'Experiments', key: process.env.NODE_ENV === 'production' ? 'experiments_platform' : (process.env.DB_EXP || 'exp_test_02'), baseurl: 'https://acclabs-experiments.azurewebsites.net/' }
+	{ name: 'Action Plans', key: process.env.NODE_ENV === 'production' ? 'action_plans_platform' : (process.env.DB_AP || 'solutions_mapping_platform'), baseurl: 'https://acclabs-actionlearningplans.azurewebsites.net/' },
+	{ name: 'Solutions Mapping', key: process.env.NODE_ENV === 'production' ? 'solutions_mapping_platform' : (process.env.DB_SM || 'solutions_mapping_platform'), baseurl: 'https://acclabs-solutionsmapping.azurewebsites.net/' },
+	{ name: 'Experiments', key: process.env.NODE_ENV === 'production' ? 'experiments_platform' : (process.env.DB_EXP || 'solutions_mapping_platform'), baseurl: 'https://acclabs-experiments.azurewebsites.net/' }
 	// { name: 'Blogs', key: 'exp_test_02', baseurl: 'https://acclabs-blogs.azurewebsites.net/' },
 	// { name: 'Consent archive', key: 'exp_test_02', baseurl: 'https://acclabs-consent-archive.azurewebsites.net/' },
 	// { name: 'Buzz', key: 'exp_test_02', baseurl: 'https://acclabs-buzz.azurewebsites.net/' },

diff --git a/routes/browse/blog/query.js b/routes/browse/blog/query.js
@@ -1,5 +1,6 @@
 // Retrieve aggregate data from the database
 const { page_content_limit } = include('config/')
+const { parsers } = include('routes/helpers/')
 
 const theWhereClause = (country, type )=> {
   let whereClause = '';
@@ -24,19 +25,6 @@ const theWhereClause = (country, type )=> {
     return whereClause;
 }
 
-const searchTextConditionFn = (searchText) => {
-  let searchTextCondition = '';
-   if (searchText !== null && searchText !== undefined && searchText.length > 0) {
-    searchTextCondition = `
-      AND (title ~* '\\m${searchText}\\M'
-        OR content ~* '\\m${searchText}\\M'
-        OR all_html_content ~* '\\m${searchText}\\M')
-    `;
-  }
-
-  return searchTextCondition;
-}
-
 exports.blogAggQuery =`
     SELECT COUNT(*) AS totalBlogs
     FROM articles
@@ -58,53 +46,68 @@ exports.totalUnknownCountries = `
     FROM articles
     WHERE country IS NULL;
 `
-
 exports.searchBlogQuery = (searchText, page, country, type) => {
   let whereClause = theWhereClause(country, type);
   let values = [
     page_content_limit,
     (page - 1) * page_content_limit,
-    page,
   ];
+  const search = searchText ? parsers.regexQuery(searchText) : '';
   let searchTextCondition = '';
-   if (searchText !== null && searchText !== undefined && searchText.length > 0) {
+
+  let textColumn = "COALESCE(content, all_html_content)";
+
+  if (searchText !== null && searchText !== undefined && searchText.length > 0) {
     searchTextCondition = `
-      AND (title ~* ('\\m' || $3::TEXT || '\\M')
-        OR content ~* ('\\m' || $3::TEXT || '\\M')
-        OR all_html_content ~* ('\\m' || $3::TEXT || '\\M')
-        OR country ~* ('\\m' || $3::TEXT || '\\M'))
+      AND (title ~* ('\\m' || $3 || '\\M')
+        OR ${textColumn} ~* ('\\m' || $3 || '\\M')
+        OR country ~* ('\\m' || $3 || '\\M'))
     `;
-     values.splice(2, 0, searchText);
+
+    values.splice(2, 0, search);
+  } else {
+    searchTextCondition = `
+      AND (article_type = 'blog' 
+        AND (${textColumn} IS NOT NULL)
+        AND title IS NOT NULL)
+    `;
+    values.splice(2, 0);
   }
-   return {
+
+  return {
     text: `
-      WITH search_results AS (
-        SELECT id, url, content, country, article_type, title, posted_date, posted_date_str, language, created_at, all_html_content
-        FROM articles
-        WHERE has_lab IS TRUE
-        ${searchTextCondition}
-        ${whereClause}
-        ORDER BY posted_date DESC
-        LIMIT $1 OFFSET $2
-      ),
-      total_count AS (
-        SELECT COUNT(*) AS total_records
-        FROM articles
-        WHERE has_lab IS TRUE
+      SELECT id, url, country, article_type, title, posted_date, 
+             posted_date_str, language, created_at, 
+               regexp_replace(
+                 regexp_replace(${textColumn}, E'\\n', ' ', 'g'),
+                 E'\\s+', ' ', 'g'
+               ) AS content
+      FROM articles
+      WHERE has_lab IS TRUE
         ${searchTextCondition}
         ${whereClause}
-      )
-      SELECT sr.*, tc.total_records, (CEIL(tc.total_records::numeric / $1)) AS total_pages, ${searchTextCondition ? '$4' : '$3'}  AS current_page
-      FROM search_results sr
-      CROSS JOIN total_count tc;
+      LIMIT $1
+      OFFSET $2;
     `,
     values,
   };
-};
+}  
+
 
 exports.articleGroup = (searchText, country, type) => {
   let whereClause = theWhereClause(country, type);
-  let searchTextCondition = searchTextConditionFn(searchText);
+  const search = searchText ? parsers.regexQuery(searchText) : '';
+  let searchTextCondition = '';
+  const values = [];
+  if (searchText !== null && searchText !== undefined && searchText.length > 0) {
+    searchTextCondition = `
+      AND (title ~* ('\\m' || $1 || '\\M')
+        OR content ~* ('\\m' || $1 || '\\M')
+        OR country ~* ('\\m' || $1 || '\\M'))
+    `;
+
+    values.push(search);
+  }
 
   return {
     text: `
@@ -115,13 +118,24 @@ exports.articleGroup = (searchText, country, type) => {
         ${whereClause}
       GROUP BY article_type;
     `,
-    values: [],
+    values,
   };
 };
-
 exports.countryGroup = (searchText, country, type) => {
   let whereClause = theWhereClause(country, type);
-  let searchTextCondition = searchTextConditionFn(searchText);
+  const search = searchText ? parsers.regexQuery(searchText) : '';
+  let searchTextCondition = '';
+  const values = [];
+  if (searchText !== null && searchText !== undefined && searchText.length > 0) {
+    searchTextCondition = `
+      AND (title ~* ('\\m' || $1 || '\\M')
+        OR content ~* ('\\m' || $1 || '\\M')
+        OR all_html_content ~* ('\\m' || $1 || '\\M')
+        OR country ~* ('\\m' || $1 || '\\M'))
+    `;
+
+    values.push(search);
+  }
 
   return {
     text: `
@@ -132,13 +146,25 @@ exports.countryGroup = (searchText, country, type) => {
         ${whereClause}
       GROUP BY country, iso3;
     `,
-    values: [],
+    values,
   };
 };
 
   exports.statsQuery = (searchText, country, type) => {
     let whereClause = theWhereClause(country, type);
-    let searchTextCondition = searchTextConditionFn(searchText);
+    const search = searchText ? parsers.regexQuery(searchText) : '';
+    let searchTextCondition = '';
+    const values = [];
+    if (searchText !== null && searchText !== undefined && searchText.length > 0) {
+      searchTextCondition = `
+        AND (title ~* ('\\m' || $1 || '\\M')
+          OR content ~* ('\\m' || $1 || '\\M')
+          OR all_html_content ~* ('\\m' || $1 || '\\M')
+          OR country ~* ('\\m' || $1 || '\\M'))
+      `;
+
+      values.push(search);
+    }
 
     return {
       text: `
@@ -176,13 +202,25 @@ exports.countryGroup = (searchText, country, type) => {
           (SELECT COUNT(DISTINCT article_type) FROM total_article_type_count) AS distinct_article_type_count,
           (SELECT total_records FROM total_count) AS total_records;
       `,
-      values: [],
+      values,
     };
   };
 
   exports.extractGeoQuery = (searchText, country, type) => {
     let whereClause = theWhereClause(country, type);
-    let searchTextCondition = searchTextConditionFn(searchText);
+    const search = searchText ? parsers.regexQuery(searchText) : '';
+    let searchTextCondition = '';
+    const values = [];
+    if (searchText !== null && searchText !== undefined && searchText.length > 0) {
+      searchTextCondition = `
+        AND (title ~* ('\\m' || $1 || '\\M')
+          OR content ~* ('\\m' || $1 || '\\M')
+          OR all_html_content ~* ('\\m' || $1 || '\\M')
+          OR country ~* ('\\m' || $1 || '\\M'))
+      `;
+
+      values.push(search);
+    }
 
     return {
       text: `
@@ -210,8 +248,6 @@ exports.countryGroup = (searchText, country, type) => {
         GROUP BY clusters.cid
         ORDER BY clusters.cid;
       `,
-      values: [],
+      values,
     };
-  };
-
-
+  };
diff --git a/routes/browse/blog/searchBlogs.js b/routes/browse/blog/searchBlogs.js
@@ -1,16 +1,28 @@
 const { searchBlogQuery } = require('./query')
+const { parsers } = include('routes/helpers/')
 
 exports.main = async kwargs => {
 	const conn = kwargs.connection ? kwargs.connection : DB.conn
 	const { req, res, baseurl, page, page_content_limit } = kwargs || {}
 
     const searchText = req.query.search ||  '';
 	let { source, search, country, type } = req.query || {}
+	const searchRegex = searchText ? parsers.regexQuery(searchText) : '';
 
 	return conn.task(t => {
 			return t.any(searchBlogQuery(searchText, page, country, type)).then(async (results) => {
+				// console.log('searchRegex ', searchRegex)
+				// Process each row and collect results
+				const res = results.map((row) => {
+					const matchedTexts = extractContext(row.content, searchRegex);
+					delete row.content
+					delete row.all_html_content
+					// console.log('matchedTexts ', matchedTexts)
+					return { ...row, matched_texts: matchedTexts };
+				});
+
 				return {
-					searchResults : results,
+					searchResults : res,
 					page,
 					total_pages : results[0]?.total_pages || 0,
 					totalRecords : results[0]?.total_records || 0
@@ -25,4 +37,91 @@ exports.main = async kwargs => {
 			})
 
 	})
-}
+}
+
+function extractContext(text, searchTerm) {
+    const regexPattern = sanitizeSearchTerm(searchTerm);
+    const match = text.match(regexPattern);
+	const words = text.split(/\s+/);
+
+	let wordIndex = 0;
+	let charCount = 0;
+	const startIndex = Math.max(0, wordIndex - 10);
+    const endIndex = Math.min(words.length, wordIndex + 100);
+
+    if (match) {
+        // Find the index of the word where the match starts
+        while (charCount < match.index) {
+            charCount += words[wordIndex].length + 1; // +1 for space
+            wordIndex++;
+        }
+    }
+
+	const contextBefore = words.slice(startIndex, wordIndex).join(' ');
+	const contextAfter = words.slice(wordIndex, endIndex).join(' ');
+	return contextBefore + ' ' + contextAfter;
+}
+
+
+function sanitizeSearchTerm(searchTerm) {
+    // Split the search term into individual words or phrases
+    const terms = searchTerm.split('|');
+
+    // Sanitize each word or phrase
+    const sanitizedTerms = terms.map(term => {
+        // Remove invalid escape characters
+        const termWithoutInvalidEscapes = term.replace(/\\(?![b])/g, '');
+
+        // Replace \m and \M with \b
+        return termWithoutInvalidEscapes.replace(/m|M/g, '\\b');
+    });
+
+    // Join the sanitized words or phrases back into a single string
+    const sanitizedSearchTerm = sanitizedTerms.join('|');
+
+    // Create regular expression
+    const regexPattern = new RegExp(`${sanitizedSearchTerm}`, 'i');
+    return regexPattern;
+}
+
+
+
+
+
+ const documents = _data => {
+	const { embeddings, documents, filters, language } = _data
+
+	let b = '\\b' // WORD BOUNDARIES
+	let B = '\\B'
+	if (language === 'AR') {
+		b = '(^|[^\u0621-\u064A])'
+		B = '($|[^\u0621-\u064A])'
+	}
+
+	let terms = []
+	if (embeddings) terms = embeddings.flat().unique().filter(d => d.charAt(0) !== '-')
+	let filteredTerms = []
+	if (filters) filteredTerms = filters.flat().unique().filter(d => d.charAt(0) !== '-')
+
+	return documents.map(d => {
+		d.matches = []
+		terms.forEach(c => {
+			let match
+			if (c.indexOf('*') !== -1) match = d.text.match(new RegExp(`${b}(\#)?${c.trim()}(\\w+)?`, 'gi'))
+			else match = d.text.match(new RegExp(`${b}(\#)?${c.trim()}(\\S+)?`, 'gi'))
+			if (match) match.unique().forEach(b => d.text = d.text.replace(b, `<span class='highlight-term'>${b}</span>`))
+			d.matches.push({ term: c, count: (match || []).length })
+		})
+		filteredTerms.forEach(c => {
+			let match
+			if (c.indexOf('*') !== -1) match = d.text.match(new RegExp(`${b}(\#)?${c.trim()}(\\w+)?`, 'gi'))
+			else match = d.text.match(new RegExp(`${b}(\#)?${c.trim()}(\\S+)?`, 'gi'))
+			if (match) match.forEach(b => d.text = d.text.replace(b, `<span class='highlight-filtered-term'>${b}</span>`))
+		})
+		if (d.tags) d.tags = d.tags.map(c => JSON.parse(c))
+		return d
+	})
+}
+
+
+
diff --git a/views/browse/blogs/index.ejs b/views/browse/blogs/index.ejs
@@ -152,31 +152,7 @@
 								  </div>
 								  <div class="media media-txt" style="margin-top: 20px; white-space: inherit !important;">
 									<a class="pad-link" target="_blank" style="width: 100% !important; ">
-										<%
-										  function findMatchingText(largeText) {
-
-											largeText.trim().replace(/^\n+|\n+$/g, '');
-
-											var searchText = locals?.metadata?.page?.query?.['search'] || ""
-
-											var escapedSearchText = searchText.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
-											var pattern = new RegExp(`[^.!?]*${escapedSearchText}[^.!?]*[.!?]`, 'g');
-											var matches = largeText.match(pattern);
-
-											if (matches && matches.length > 0) {
-											  var firstSentence = matches[0];
-											  var secondSentence = matches[1];
-											  var lastSentence = matches[matches.length - 1];
-											  return firstSentence + " " + `${secondSentence || ''}`;
-											} else {
-											  return largeText?.split(' ')?.slice(0, 100)?.join(' ') || null;
-											}
-										  }
-
-										  var extractedText = findMatchingText(result?.content || result?.all_html_content || '');
-										%>
-
-										<%= extractedText || 'We are unable to extract content from the original website. Please click to read content on the original website.' %> ... <a target="_blank" href="<%= result.url %>" style="cursor: pointer; color: cornflowerblue;">read more</a>
+										<%= result?.matched_texts || 'We are unable to extract content from the original website. Please click to read content on the original website.' %> ... <a target="_blank" href="<%= result.url %>" style="cursor: pointer; color: cornflowerblue;">read more</a>
 									  </a>
 								  </div>
 								  <div class="meta tag-group">