From d9ea5c50f3ad0f887815a023f0194ae804b44532 Mon Sep 17 00:00:00 2001 From: Karen Coombs Date: Fri, 25 Aug 2023 20:01:41 -0500 Subject: [PATCH 1/2] updated newsbank --- nb/test/nb.2023-08-25.csv | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 nb/test/nb.2023-08-25.csv diff --git a/nb/test/nb.2023-08-25.csv b/nb/test/nb.2023-08-25.csv new file mode 100644 index 000000000..e69de29bb From bfd53ff5fc674696129acd5b520bf87d01659e4b Mon Sep 17 00:00:00 2001 From: Karen Coombs Date: Wed, 30 Aug 2023 08:09:05 -0500 Subject: [PATCH 2/2] updated newsbank --- nb/parser.js | 8 +++++++- nb/test/nb.2020-10-15.csv | 18 +++++++++--------- nb/test/nb.2023-08-25.csv | 11 +++++++++++ 3 files changed, 27 insertions(+), 10 deletions(-) diff --git a/nb/parser.js b/nb/parser.js index 83d742c3c..196b7727e 100755 --- a/nb/parser.js +++ b/nb/parser.js @@ -26,17 +26,21 @@ module.exports = new Parser(function analyseEC(parsedUrl, ec) { // https://infoweb.newsbank.com/apps/news/results?p=NewsBank&fld-base-0=alltext&sort=YMD_date%3AD&maxresults=20&val-base-0=H1N1&t= result.rtype = 'SEARCH'; result.mime = 'HTML'; + result.pii = param.p; + result.title_id = param.t.split('!')[1]; } else if ((match = /^\/resources\/search\/nb$/i.exec(path)) !== null) { // https://infoweb.newsbank.com/resources/search/nb?p=OBIT&t=state%3AIL%21USA%2B-%2BIllinois // https://infoweb.newsbank.com/resources/search/nb?p=OBIT&b=results&t=state%3AIL%21USA%2B-%2BIllinois&fld0=dece&val0=Duffy&bln1=AND&fld1=YMD_date&val1=&bln2=AND&fld2=doc_body&val2=&sort=YMD_date%3AD&page=0 result.rtype = 'SEARCH'; result.mime = 'MISC'; + result.pii = param.p; } else if ((match = /^\/resources\/doc\/nb\/obit\/([a-z0-9-]+)$/i.exec(path)) !== null) { // https://infoweb.newsbank.com/resources/doc/nb/obit/175A318630BE6C90-175A318630BE6C90?p=OBIT result.rtype = 'RECORD'; result.mime = 'HTML'; result.unitid = match[1]; + result.pii = param.p; } else if ((match = /^\/iw-search\/we\/Static$/i.exec(path)) !== null) { // https://infoweb.newsbank.com/iw-search/we/Static?p_product=Space&f_location=space&p_theme=current&p_action=doc&p_nbid=J50L52JHMTYwMjE4MjIzMC42MDYyNzg6MToxNDoxMzIuMTc0LjI1MC45NQ&f_docnum=17DF632D56E33508&f_topic=1&f_prod=BRFB&f_type=&d_refprod=SPECIALREPORTS result.rtype = 'ARTICLE'; @@ -47,7 +51,9 @@ module.exports = new Parser(function analyseEC(parsedUrl, ec) { // https://infoweb.newsbank.com/apps/news/document-view?p=AMNEWS&t=favorite%3A1467499E%21Columbus%2520Dispatch%2520Historical%2520and%2520Current&sort=YMD_date%3AD&fld-base-0=alltext&maxresults=20&val-base-0=Ginther&docref=news/17DED41B89F9D928 result.rtype = 'ARTICLE'; result.mime = 'HTML'; - result.unitid = param.docref.replace('news/', ''); + result.unitid = param.docref.split('/')[1]; + result.pii = param.p; + result.title_id = param.t.split('!')[1]; } return result; diff --git a/nb/test/nb.2020-10-15.csv b/nb/test/nb.2020-10-15.csv index fa55222e6..564d06645 100644 --- a/nb/test/nb.2020-10-15.csv +++ b/nb/test/nb.2020-10-15.csv @@ -1,9 +1,9 @@ -out-rtype;out-mime;out-login;out-unitid;in-url -SEARCH;HTML;;;https://infoweb.newsbank.com/apps/news/results?p=AMNEWS&fld-base-0=alltext&sort=YMD_date%3AD&maxresults=20&val-base-0=Ginther&t=favorite%3A1467499E%21Columbus%2520Dispatch%2520Historical%2520and%2520Current -SEARCH;HTML;;;https://infoweb.newsbank.com/apps/news/results?p=NewsBank&fld-base-0=alltext&sort=YMD_date%3AD&maxresults=20&val-base-0=H1N1&t= -SEARCH;MISC;;;https://infoweb.newsbank.com/resources/search/nb?p=OBIT&t=state%3AIL%21USA%2B-%2BIllinois -SEARCH;MISC;;;https://infoweb.newsbank.com/resources/search/nb?p=OBIT&b=results&t=state%3AIL%21USA%2B-%2BIllinois&fld0=dece&val0=Duffy&bln1=AND&fld1=YMD_date&val1=&bln2=AND&fld2=doc_body&val2=&sort=YMD_date%3AD&page=0 -RECORD;HTML;;175A318630BE6C90-175A318630BE6C90;https://infoweb.newsbank.com/resources/doc/nb/obit/175A318630BE6C90-175A318630BE6C90?p=OBIT -ARTICLE;HTML;;17DF632D56E33508;https://infoweb.newsbank.com/iw-search/we/Static?p_product=Space&f_location=space&p_theme=current&p_action=doc&p_nbid=J50L52JHMTYwMjE4MjIzMC42MDYyNzg6MToxNDoxMzIuMTc0LjI1MC45NQ&f_docnum=17DF632D56E33508&f_topic=1&f_prod=BRFB&f_type=&d_refprod=SPECIALREPORTS -ARTICLE;HTML;;17DFAD1BF786C478;https://infoweb.newsbank.com/apps/news/document-view?p=NewsBank&t=&sort=YMD_date%3AD&fld-base-0=alltext&maxresults=20&val-base-0=H1N1&docref=news/17DFAD1BF786C478 -ARTICLE;HTML;;17DED41B89F9D928;https://infoweb.newsbank.com/apps/news/document-view?p=AMNEWS&t=favorite%3A1467499E%21Columbus%2520Dispatch%2520Historical%2520and%2520Current&sort=YMD_date%3AD&fld-base-0=alltext&maxresults=20&val-base-0=Ginther&docref=news/17DED41B89F9D928 \ No newline at end of file +out-title_id;out-pii;out-rtype;out-mime;out-login;out-unitid;in-url +Columbus Dispatch Historical and Current;AMNEWS;SEARCH;HTML;;;https://infoweb.newsbank.com/apps/news/results?p=AMNEWS&fld-base-0=alltext&sort=YMD_date%3AD&maxresults=20&val-base-0=Ginther&t=favorite%3A1467499E%21Columbus%2520Dispatch%2520Historical%2520and%2520Current +;NewsBank;SEARCH;HTML;;;https://infoweb.newsbank.com/apps/news/results?p=NewsBank&fld-base-0=alltext&sort=YMD_date%3AD&maxresults=20&val-base-0=H1N1&t= +;OBIT;SEARCH;MISC;;;https://infoweb.newsbank.com/resources/search/nb?p=OBIT&t=state%3AIL%21USA%2B-%2BIllinois +;OBIT;SEARCH;MISC;;;https://infoweb.newsbank.com/resources/search/nb?p=OBIT&b=results&t=state%3AIL%21USA%2B-%2BIllinois&fld0=dece&val0=Duffy&bln1=AND&fld1=YMD_date&val1=&bln2=AND&fld2=doc_body&val2=&sort=YMD_date%3AD&page=0 +;OBIT;RECORD;HTML;;175A318630BE6C90-175A318630BE6C90;https://infoweb.newsbank.com/resources/doc/nb/obit/175A318630BE6C90-175A318630BE6C90?p=OBIT +;;ARTICLE;HTML;;17DF632D56E33508;https://infoweb.newsbank.com/iw-search/we/Static?p_product=Space&f_location=space&p_theme=current&p_action=doc&p_nbid=J50L52JHMTYwMjE4MjIzMC42MDYyNzg6MToxNDoxMzIuMTc0LjI1MC45NQ&f_docnum=17DF632D56E33508&f_topic=1&f_prod=BRFB&f_type=&d_refprod=SPECIALREPORTS +;NewsBank;ARTICLE;HTML;;17DFAD1BF786C478;https://infoweb.newsbank.com/apps/news/document-view?p=NewsBank&t=&sort=YMD_date%3AD&fld-base-0=alltext&maxresults=20&val-base-0=H1N1&docref=news/17DFAD1BF786C478 +Columbus Dispatch Historical and Current;AMNEWS;ARTICLE;HTML;;17DED41B89F9D928;https://infoweb.newsbank.com/apps/news/document-view?p=AMNEWS&t=favorite%3A1467499E%21Columbus%2520Dispatch%2520Historical%2520and%2520Current&sort=YMD_date%3AD&fld-base-0=alltext&maxresults=20&val-base-0=Ginther&docref=news/17DED41B89F9D928 \ No newline at end of file diff --git a/nb/test/nb.2023-08-25.csv b/nb/test/nb.2023-08-25.csv index e69de29bb..c1724efae 100644 --- a/nb/test/nb.2023-08-25.csv +++ b/nb/test/nb.2023-08-25.csv @@ -0,0 +1,11 @@ +out-title_id;out-pii;out-unitid;out-rtype;out-mime;in-url +Columbus Dispatch Historical and Current;AMNEWS;17DED41B89F9D928;ARTICLE;HTML;https://infoweb.newsbank.com/apps/news/document-view?p=AMNEWS&t=favorite%3A1467499E%21Columbus%2520Dispatch%2520Historical%2520and%2520Current&sort=YMD_date%3AD&fld-base-0=alltext&maxresults=20&val-base-0=Ginther&docref=news/17DED41B89F9D928 +;NewsBank;17DFAD1BF786C478;ARTICLE;HTML;https://infoweb.newsbank.com/apps/news/document-view?p=NewsBank&t=&sort=YMD_date%3AD&fld-base-0=alltext&maxresults=20&val-base-0=H1N1&docref=news/17DFAD1BF786C478 +;;17DF632D56E33508;ARTICLE;HTML;https://infoweb.newsbank.com/iw-search/we/Static?p_product=Space&f_location=space&p_theme=current&p_action=doc&p_nbid=J50L52JHMTYwMjE4MjIzMC42MDYyNzg6MToxNDoxMzIuMTc0LjI1MC45NQ&f_docnum=17DF632D56E33508&f_topic=1&f_prod=BRFB&f_type=&d_refprod=SPECIALREPORTS +;OBIT;175A318630BE6C90-175A318630BE6C90;RECORD;HTML;https://infoweb.newsbank.com/resources/doc/nb/obit/175A318630BE6C90-175A318630BE6C90?p=OBIT +;OBIT;;SEARCH;MISC;https://infoweb.newsbank.com/resources/search/nb?p=OBIT&b=results&t=state%3AIL%21USA%2B-%2BIllinois&fld0=dece&val0=Duffy&bln1=AND&fld1=YMD_date&val1=&bln2=AND&fld2=doc_body&val2=&sort=YMD_date%3AD&page=0 +;OBIT;;SEARCH;MISC;https://infoweb.newsbank.com/resources/search/nb?p=OBIT&t=state%3AIL%21USA%2B-%2BIllinois +;NewsBank;;SEARCH;HTML;https://infoweb.newsbank.com/apps/news/results?p=NewsBank&fld-base-0=alltext&sort=YMD_date%3AD&maxresults=20&val-base-0=H1N1&t= +Columbus Dispatch Historical and Current;AMNEWS;;SEARCH;HTML;https://infoweb.newsbank.com/apps/news/results?p=AMNEWS&fld-base-0=alltext&sort=YMD_date%3AD&maxresults=20&val-base-0=Ginther&t=favorite%3A1467499E%21Columbus%2520Dispatch%2520Historical%2520and%2520Current +Oregonian;AMNEWS;v2:11A73E5827618330@EANX-NB-132AEA35E5DCD707@2447161-132AE92168CBD33E@4-132AE92168CBD33E@;ARTICLE;HTML;https://infoweb-newsbank-com/apps/news/document-view?p=AMNEWS&t=pubname%3A11A73E5827618330%21Oregonian&sort=YMD_date%3AD&fld-base-0=alltext&maxresults=20&val-base-0=portland&docref=image/v2%3A11A73E5827618330%40EANX-NB-132AEA35E5DCD707%402447161-132AE92168CBD33E%404-132AE92168CBD33E%40 +Oregonian;AMNEWS;;SEARCH;HTML;https://infoweb-newsbank.com/apps/news/results?p=AMNEWS&fld-base-0=alltext&sort=YMD_date%3AD&maxresults=20&val-base-0=portland&t=pubname%3A11A73E5827618330%21Oregonian \ No newline at end of file