diff --git a/package.json b/package.json index f250a13..f016073 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "cei-crawler", - "version": "2.0.1", + "version": "2.0.2", "description": "Crawler para pegar dados do Canal Eletronico do Investidor", "main": "src/app.js", "repository": { diff --git a/src/lib/DividendsCrawler.js b/src/lib/DividendsCrawler.js index 95741c8..279cea7 100644 --- a/src/lib/DividendsCrawler.js +++ b/src/lib/DividendsCrawler.js @@ -151,7 +151,6 @@ class DividendsCrawler { })).get() .filter(institution => institution.value > 0); - // Iterate over institutions, accounts, processing the stocks for (const institution of institutions) { @@ -187,34 +186,8 @@ class DividendsCrawler { console.log(`Selecting account ${account}`); domPage(PAGE.SELECT_ACCOUNT).attr('value', account); - - const formDataHistory = CeiUtils.extractFormDataFromDOM(domPage, FETCH_FORMS.DIVIDENDS_ACCOUNT, { - ctl00$ContentPlaceHolder1$ToolkitScriptManager1: 'ctl00$ContentPlaceHolder1$updFiltro|ctl00$ContentPlaceHolder1$btnConsultar', - __EVENTARGUMENT: '', - __LASTFOCUS: '' - }); - - const historyRequest = await cookieManager.fetch(PAGE.URL, { - ...FETCH_OPTIONS.DIVIDENDS_ACCOUNT, - body: formDataHistory - }); - const dividendsText = normalizeWhitespace(await historyRequest.text()); - const errorMessage = CeiUtils.extractMessagePostResponse(dividendsText); - - if (errorMessage && errorMessage.type === 2) { - throw new CeiCrawlerError(CeiErrorTypes.SUBMIT_ERROR, errorMessage.message); - } - - const dividendsDOM = cheerio.load(dividendsText); - - // Process the page - /* istanbul ignore next */ - if (traceOperations) - console.log(`Processing dividends data`); - - const futureEvents = this._processEvents(dividendsDOM, PAGE.FUTURE_EVENTS_TITLE); - const pastEvents = this._processEvents(dividendsDOM, PAGE.PAST_EVENTS_TITLE); + const { futureEvents, pastEvents } = await this._getDataPage(domPage, cookieManager, traceOperations); // Save the result result.push({ @@ -278,6 +251,53 @@ class DividendsCrawler { } } + /** + * Returns the data from the page after trying more than once + * @param {cheerio.Root} dom DOM of page + * @param {FetchCookieManager} cookieManager - FetchCookieManager to work with + * @param {Boolean} traceOperations - Whether to trace operations or not + */ + static async _getDataPage(dom, cookieManager, traceOperations) { + while(true) { + const formDataHistory = CeiUtils.extractFormDataFromDOM(dom, FETCH_FORMS.DIVIDENDS_ACCOUNT, { + ctl00$ContentPlaceHolder1$ToolkitScriptManager1: 'ctl00$ContentPlaceHolder1$updFiltro|ctl00$ContentPlaceHolder1$btnConsultar', + __EVENTARGUMENT: '', + __LASTFOCUS: '' + }); + + const dividendsRequest = await cookieManager.fetch(PAGE.URL, { + ...FETCH_OPTIONS.DIVIDENDS_ACCOUNT, + body: formDataHistory + }); + + const dividendsText = normalizeWhitespace(await dividendsRequest.text()); + const errorMessage = CeiUtils.extractMessagePostResponse(dividendsText); + + if (errorMessage && errorMessage.type === 2) { + throw new CeiCrawlerError(CeiErrorTypes.SUBMIT_ERROR, errorMessage.message); + } + + const dividendsDOM = cheerio.load(dividendsText); + + // Process the page + /* istanbul ignore next */ + if (traceOperations) + console.log(`Processing dividends data`); + + const futureEvents = this._processEvents(dividendsDOM, PAGE.FUTURE_EVENTS_TITLE); + const pastEvents = this._processEvents(dividendsDOM, PAGE.PAST_EVENTS_TITLE); + + if (errorMessage.type !== undefined || futureEvents.length > 0 || pastEvents.length > 0) { + return { + futureEvents, + pastEvents + }; + } + + const updtForm = CeiUtils.extractUpdateForm(dividendsText); + CeiUtils.updateFieldsDOM(dom, updtForm); + } + } /** * Process the events given the parameters diff --git a/src/lib/StockHistoryCrawler.js b/src/lib/StockHistoryCrawler.js index 271eefe..35493fb 100644 --- a/src/lib/StockHistoryCrawler.js +++ b/src/lib/StockHistoryCrawler.js @@ -209,31 +209,8 @@ class StockHistoryCrawler { console.log(`Selecting account ${account}`); domPage(PAGE.SELECT_ACCOUNT).attr('value', account); - - const formDataHistory = CeiUtils.extractFormDataFromDOM(domPage, FETCH_FORMS.STOCK_HISTORY_ACCOUNT, { - ctl00$ContentPlaceHolder1$ToolkitScriptManager1: 'ctl00$ContentPlaceHolder1$updFiltro|ctl00$ContentPlaceHolder1$btnConsultar', - __EVENTARGUMENT: '' - }); - - const historyRequest = await cookieManager.fetch(PAGE.URL, { - ...FETCH_OPTIONS.STOCK_HISTORY_ACCOUNT, - body: formDataHistory - }); - const historyText = normalizeWhitespace(await historyRequest.text()); - const errorMessage = CeiUtils.extractMessagePostResponse(historyText); - - if (errorMessage && errorMessage.type === 2) { - throw new CeiCrawlerError(CeiErrorTypes.SUBMIT_ERROR, errorMessage.message); - } - - const historyDOM = cheerio.load(historyText); - - /* istanbul ignore next */ - if (traceOperations) - console.log(`Processing stock history data`); - - const stockHistory = this._processStockHistory(historyDOM); + const stockHistory = await this._getDataPage(domPage, cookieManager, traceOperations); /* istanbul ignore next */ if (traceOperations) { @@ -300,6 +277,48 @@ class StockHistoryCrawler { } } + /** + * Returns the data from the page after trying more than once + * @param {cheerio.Root} dom DOM of page + * @param {FetchCookieManager} cookieManager - FetchCookieManager to work with + * @param {Boolean} traceOperations - Whether to trace operations or not + */ + static async _getDataPage(dom, cookieManager, traceOperations) { + while(true) { + const formDataHistory = CeiUtils.extractFormDataFromDOM(dom, FETCH_FORMS.STOCK_HISTORY_ACCOUNT, { + ctl00$ContentPlaceHolder1$ToolkitScriptManager1: 'ctl00$ContentPlaceHolder1$updFiltro|ctl00$ContentPlaceHolder1$btnConsultar', + __EVENTARGUMENT: '' + }); + + const historyRequest = await cookieManager.fetch(PAGE.URL, { + ...FETCH_OPTIONS.STOCK_HISTORY_ACCOUNT, + body: formDataHistory + }); + + const historyText = normalizeWhitespace(await historyRequest.text()); + const errorMessage = CeiUtils.extractMessagePostResponse(historyText); + + if (errorMessage && errorMessage.type === 2) { + throw new CeiCrawlerError(CeiErrorTypes.SUBMIT_ERROR, errorMessage.message); + } + + const historyDOM = cheerio.load(historyText); + + /* istanbul ignore next */ + if (traceOperations) + console.log(`Processing stock history data`); + + const stockHistory = this._processStockHistory(historyDOM); + + if (errorMessage.type !== undefined || stockHistory.length > 0) { + return stockHistory; + } + + const updtForm = CeiUtils.extractUpdateForm(historyText); + CeiUtils.updateFieldsDOM(dom, updtForm); + } + } + /** * Process the stock history to a DTO * @param {cheerio.Root} dom DOM table stock history diff --git a/src/lib/WalletCrawler.js b/src/lib/WalletCrawler.js index 570364b..511ce5d 100644 --- a/src/lib/WalletCrawler.js +++ b/src/lib/WalletCrawler.js @@ -201,33 +201,7 @@ class WalletCrawler { domPage(PAGE.SELECT_ACCOUNT).attr('value', account); - const formDataHistory = CeiUtils.extractFormDataFromDOM(domPage, FETCH_FORMS.WALLET_ACCOUNT, { - ctl00$ContentPlaceHolder1$ToolkitScriptManager1: 'ctl00$ContentPlaceHolder1$updFiltro|ctl00$ContentPlaceHolder1$btnConsultar', - __EVENTARGUMENT: '', - __LASTFOCUS: '' - }); - - const historyRequest = await cookieManager.fetch(PAGE.URL, { - ...FETCH_OPTIONS.WALLET_ACCOUNT, - body: formDataHistory - }); - - const walletText = normalizeWhitespace(await historyRequest.text()); - const errorMessage = CeiUtils.extractMessagePostResponse(walletText); - - if (errorMessage && errorMessage.type === 2) { - throw new CeiCrawlerError(CeiErrorTypes.SUBMIT_ERROR, errorMessage.message); - } - - const walletDOM = cheerio.load(walletText); - - // Process the page - /* istanbul ignore next */ - if (traceOperations) - console.log(`Processing wallet data`); - - const stockWallet = this._processStockWallet(walletDOM); - const nationalTreasuryWallet = this._processNationalTreasuryWallet(walletDOM); + const { stockWallet, nationalTreasuryWallet } = await this._getDataPage(domPage, cookieManager, traceOperations); // Save the result result.push({ @@ -291,6 +265,54 @@ class WalletCrawler { } } + /** + * Returns the data from the page after trying more than once + * @param {cheerio.Root} dom DOM of page + * @param {FetchCookieManager} cookieManager - FetchCookieManager to work with + * @param {Boolean} traceOperations - Whether to trace operations or not + */ + static async _getDataPage(dom, cookieManager, traceOperations) { + while(true) { + const formDataWallet = CeiUtils.extractFormDataFromDOM(dom, FETCH_FORMS.WALLET_ACCOUNT, { + ctl00$ContentPlaceHolder1$ToolkitScriptManager1: 'ctl00$ContentPlaceHolder1$updFiltro|ctl00$ContentPlaceHolder1$btnConsultar', + __EVENTARGUMENT: '', + __LASTFOCUS: '' + }); + + const walletRequest = await cookieManager.fetch(PAGE.URL, { + ...FETCH_OPTIONS.WALLET_ACCOUNT, + body: formDataWallet + }); + + const walletText = normalizeWhitespace(await walletRequest.text()); + const errorMessage = CeiUtils.extractMessagePostResponse(walletText); + + if (errorMessage && errorMessage.type === 2) { + throw new CeiCrawlerError(CeiErrorTypes.SUBMIT_ERROR, errorMessage.message); + } + + const walletDOM = cheerio.load(walletText); + + // Process the page + /* istanbul ignore next */ + if (traceOperations) + console.log(`Processing wallet data`); + + const stockWallet = this._processStockWallet(walletDOM); + const nationalTreasuryWallet = this._processNationalTreasuryWallet(walletDOM); + + if (errorMessage.type !== undefined || stockWallet.length > 0 || nationalTreasuryWallet.length > 0) { + return { + stockWallet, + nationalTreasuryWallet + }; + } + + const updtForm = CeiUtils.extractUpdateForm(historyText); + CeiUtils.updateFieldsDOM(dom, updtForm); + } + } + /** * Process the stock wallet to a DTO * @param {cheerio.Root} dom DOM table stock history