Skip to content

Commit

Permalink
update parser emis
Browse files Browse the repository at this point in the history
  • Loading branch information
chaoyinYang committed Oct 11, 2024
1 parent ed00672 commit 8ad7541
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 5 deletions.
16 changes: 16 additions & 0 deletions emis/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ module.exports = new Parser(function analyseEC(parsedUrl, ec) {
let path = parsedUrl.pathname;
// uncomment this line if you need parameters
let param = parsedUrl.query || {};
let match;

// use console.error for debuging
// console.error(parsedUrl);
Expand All @@ -33,11 +34,21 @@ module.exports = new Parser(function analyseEC(parsedUrl, ec) {
result.rtype = 'REPORT';
result.mime = 'HTML';
result.unitid = param.doc_id;
} else if ((match = /^\/v2\/documents\/report\/([0-9]+)$/i.exec(path)) != null) {
// https://www.emis.com/v2/documents/report/834568862?keyword=anduril
result.rtype = 'REPORT';
result.mime = 'HTML';
result.unitid = match[1];
} else if (/^\/php\/companies\/index$/i.test(path)) {
// https://www.emis.com/php/companies/index?pc=HK&cmpy=9737982
result.rtype = 'RECORD';
result.mime = 'HTML';
result.unitid = param.cmpy;
} else if ((match = /^\/v2\/companies\/profile\/[a-zA-Z0-9]+\/([0-9]+)$/i.exec(path)) != null) {
// https://www.emis.com/v2/companies/profile/US/14330970
result.rtype = 'RECORD';
result.mime = 'HTML';
result.unitid = match[1];
} else if (/^\/php\/companies\/index\/keystatsbox$/i.test(path) && param.excel === '1') {
// https://www.emis.com/php/companies/index/keystatsbox?pc=HK&cmpy=9737982&hideValues=&currency=HKD&display_units=3&excel=1&tbl=keystats-page-table-exchange
result.rtype = 'DATASET';
Expand All @@ -47,6 +58,11 @@ module.exports = new Parser(function analyseEC(parsedUrl, ec) {
// https://www.emis.com/php/search/searchv2
result.rtype = 'SEARCH';
result.mime = 'HTML';
} else if ((match = /^\/v2\/documents\/([0-9]+)$/i.exec(path)) != null) {
// https://www.emis.com/v2/documents/837338451
result.rtype = 'ARTICLE';
result.mime = 'HTML';
result.unitid = match[1];
}

return result;
Expand Down
13 changes: 8 additions & 5 deletions emis/test/emis.2023-04-17.csv → emis/test/emis.2024-10-11.csv
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
out-unitid;out-rtype;out-mime;in-url
;SEARCH;HTML;https://www.emis.com/php/search/searchv2
766831747;REPORT;PDF;https://www.emis.com/php/search/docpdf?doc_id=766831747
9737982;RECORD;HTML;https://www.emis.com/php/companies/index?pc=HK&cmpy=9737982
9737982;DATASET;XLS;https://www.emis.com/php/companies/index/keystatsbox?pc=HK&cmpy=9737982&hideValues=&currency=HKD&display_units=3&excel=1&tbl=keystats-page-table-exchange
837338451;ARTICLE;HTML;https://www.emis.com/v2/documents/837338451
834568862;REPORT;HTML;https://www.emis.com/v2/documents/report/834568862?keyword=anduril
14330970;RECORD;HTML;https://www.emis.com/v2/companies/profile/US/14330970
719516695;REPORT;HTML;https://www.emis.com/php/search/pdf2html?pc=BR&doc_id=719516695&type=1
719516695;REPORT;PDF;https://www.emis.com/php/search/docpdf?pc=BR&sv=EMIS&doc_id=719516695
719516695;REPORT;HTML;https://www.emis.com/php/search/pdf2html?pc=BR&doc_id=719516695&type=1
9737982;DATASET;XLS;https://www.emis.com/php/companies/index/keystatsbox?pc=HK&cmpy=9737982&hideValues=&currency=HKD&display_units=3&excel=1&tbl=keystats-page-table-exchange
9737982;RECORD;HTML;https://www.emis.com/php/companies/index?pc=HK&cmpy=9737982
766831747;REPORT;PDF;https://www.emis.com/php/search/docpdf?doc_id=766831747
;SEARCH;HTML;https://www.emis.com/php/search/searchv2

0 comments on commit 8ad7541

Please sign in to comment.