Skip to content

Commit

Permalink
Merge pull request #297 from 0xc0d3c5/execute-scrape-tests-within-nodejs
Browse files Browse the repository at this point in the history
Execute scrape.test.js within a Node.js environment
  • Loading branch information
Yomguithereal authored Nov 6, 2020
2 parents 15bc259 + 7f2997f commit 684667a
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 13 deletions.
12 changes: 6 additions & 6 deletions src/methods/artoo.methods.scrape.js
Original file line number Diff line number Diff line change
Expand Up @@ -163,11 +163,11 @@

params = params || {};

var sel = typeof root !== 'string' ? root.selector : root,
var sel = root,
headers;

if (!params.headers) {
return artoo.scrape(sel + ' tr:has(td)', {
return artoo.scrape($(sel).find('tr:has(td)'), {
scrape: {
iterator: 'td',
data: params.data || 'text'
Expand All @@ -182,12 +182,12 @@

if (headerType === 'th') {
headers = artoo.scrape(
sel + ' th', headerFn || 'text'
$(sel).find('th'), headerFn || 'text'
);
}
else if (headerType === 'first') {
headers = artoo.scrape(
sel + ' tr:has(td):first-of-type td',
$(sel).find(' tr:has(td):first-of-type td'),
headerFn || 'text'
);
}
Expand All @@ -200,8 +200,8 @@

// Scraping
return artoo.scrape(
sel + ' tr:has(td)' +
(headerType === 'first' ? ':not(:first-of-type)' : ''), function() {
$(sel).find('tr:has(td)' +
(headerType === 'first' ? ':not(:first-of-type)' : '')), function() {
var o = {};

headers.forEach(function(h, i) {
Expand Down
1 change: 1 addition & 0 deletions test/endpoint.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@
*/

require('./suites/node.test.js');
require('./suites/node.scrape.test.js');
30 changes: 30 additions & 0 deletions test/suites/node.scrape.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/**
* artoo scrape method node unit tests
* ===================================
*
*/
var cheerio = require('cheerio'),
artoo = require('../../build/artoo.node.js');

function readFile(path) {
return fs.readFileSync(path, 'utf-8');
}

// Monkey patch - begin
var assert = require('assert')
var fs = require('fs')
var $

helpers = {
// Define a nodejs compliant fetchHTMLResource
fetchHTMLResource: function (name, cb) {
var $newDom = cheerio.load('<div id="' + name + '"></div>');
artoo.setContext($newDom)
$newDom('#' + name).append(readFile(__dirname + '/../resources/' + name + '.html'));
$ = $newDom
cb('#' + name);
}
};
// Monkey patch - end

eval(readFile(__dirname + '/scrape.test.js'))
14 changes: 7 additions & 7 deletions test/suites/scrape.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
assert.deepEqual(
artoo.scrape(
function($) {
return $(id + ' li > a:first').add(id + ' li > a:last');
return $(id + ' li:first-of-type > a').add(id + ' li:last-of-type > a');
}
),
[titleList[0], titleList[3]],
Expand Down Expand Up @@ -365,7 +365,7 @@
assert.deepEqual(
artoo.scrapeOne(id + ' > ul', {
title: {
sel: 'li:first > span'
sel: 'li:first-of-type > span'
},
items: {
sel: 'li:nth-child(2)',
Expand Down Expand Up @@ -403,7 +403,7 @@
});

assert.deepEqual(
artoo.scrape(id + ' .reference tr:not(:first)', {
artoo.scrape(id + ' .reference tr:not(:first-of-type)', {
scrape: {
iterator: 'td',
data: 'text'
Expand All @@ -426,10 +426,10 @@
);

assert.deepEqual(
artoo.scrape(id + ' .reference tr:not(:first)', {
firstname: {sel: 'td:first'},
lastname: {sel: 'td:eq(1)'},
points: {sel: 'td:eq(2)'}
artoo.scrape(id + ' .reference tr:not(:first-of-type)', {
firstname: {sel: 'td:first-of-type'},
lastname: {sel: 'td:nth-of-type(2)'},
points: {sel: 'td:nth-of-type(3)'}
}),
objects,
'Scraping the list more easily should return the correct array of objects'
Expand Down

0 comments on commit 684667a

Please sign in to comment.