From 639a77db9bfc304d7d442a961a8d9fc4ec79d20d Mon Sep 17 00:00:00 2001 From: Leslie-Wong-H <79917148leslie@gmail.com> Date: Thu, 29 Jun 2023 10:44:49 +0800 Subject: [PATCH 1/4] Parallel JSON data polish operation --- loader.js | 39 ++++++++++++++++++++++++++++++--------- package.json | 1 + yarn.lock | 2 +- 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/loader.js b/loader.js index b0f4fc9..bc4c74d 100644 --- a/loader.js +++ b/loader.js @@ -5,6 +5,7 @@ import lzma from "lzma-native"; import fetch from "node-fetch"; import { MilvusClient } from "@zilliz/milvus2-sdk-node"; import cron from "node-cron"; +import { chunk } from 'lodash'; import JBC from "jsbi-calculator"; const { calculator, BigDecimal } = JBC; @@ -174,16 +175,36 @@ const messageHandle = async (data) => { try { console.log(`Polish JSON data`); - let jsonData = new Array(dedupedHashList.length).fill(null); - for (let i = 0; i < dedupedHashList.length; i++) { - const doc = dedupedHashList[i]; - jsonData[i] = { - id: `${file}/${doc.time.toFixed(2)}`, - // cl_hi: doc.cl_hi, // reduce index size - cl_ha: getNormalizedCharCodesVector(doc.cl_ha, 100, 1), - primary_key: getPrimaryKey(doc.cl_hi), - }; + // let jsonData = new Array(dedupedHashList.length).fill(null); + // for (let i = 0; i < dedupedHashList.length; i++) { + // const doc = dedupedHashList[i]; + // jsonData[i] = { + // id: `${file}/${doc.time.toFixed(2)}`, + // // cl_hi: doc.cl_hi, // reduce index size + // cl_ha: getNormalizedCharCodesVector(doc.cl_ha, 100, 1), + // primary_key: getPrimaryKey(doc.cl_hi), + // }; + // } + + // Parallel operation with 1000 as one unit + let chunkedJsonData = chunk(new Array(dedupedHashList.length).fill(null), 1000); + let chunkedDedupedHashList = chunk(dedupedHashList, 1000); // [1,...,2000] => [[1,...,1000],[1001,...,200]] + const modifier = (dedupedHashList, jsonData) => { + for (let i = 0; i < dedupedHashList.length; i++) { + const doc = dedupedHashList[i]; + jsonData[i] = { + id: `${file}/${doc.time.toFixed(2)}`, + // cl_hi: doc.cl_hi, // reduce index size + cl_ha: getNormalizedCharCodesVector(doc.cl_ha, 100, 1), + primary_key: getPrimaryKey(doc.cl_hi), + }; + } + return jsonData; } + const segments = await Promise.all(chunkedDedupedHashList.map((each, index) => { + return modifier(each, chunkedJsonData[index]) + })); + const jsonData = flatten(segments); // Pause for 5 seconds to make node arrange the compute resource. console.log("Pause for 5 seconds"); diff --git a/package.json b/package.json index 5ad0cae..addda21 100644 --- a/package.json +++ b/package.json @@ -35,6 +35,7 @@ "express-rate-limit": "^6.6.0", "fs-extra": "^11.1.1", "jsbi-calculator": "^0.3.5", + "lodash": "^4.17.21", "lzma-native": "^8.0.6", "node-cron": "^3.0.2", "node-fetch": "^3.2.10", diff --git a/yarn.lock b/yarn.lock index b1c899c..26ed06e 100644 --- a/yarn.lock +++ b/yarn.lock @@ -738,7 +738,7 @@ lodash.isequal@^4.5.0: resolved "https://registry.yarnpkg.com/lodash.isequal/-/lodash.isequal-4.5.0.tgz#415c4478f2bcc30120c22ce10ed3226f7d3e18e0" integrity sha512-pDo3lu8Jhfjqls6GkMgpahsF9kCyayhgykjyLMNFTKWrpVdAQtYyB4muAMWozBB4ig/dtWAmsMxLEI8wuz+DYQ== -lodash@~4.17.15: +lodash@^4.17.21, lodash@~4.17.15: version "4.17.21" resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c" integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg== From 0fd35d621e6197a73d12c6f7c5a72c0db55c68a2 Mon Sep 17 00:00:00 2001 From: Leslie-Wong-H <79917148leslie@gmail.com> Date: Sat, 1 Jul 2023 14:05:46 +0800 Subject: [PATCH 2/4] Chore: correct comment --- loader.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loader.js b/loader.js index bc4c74d..e2cc451 100644 --- a/loader.js +++ b/loader.js @@ -188,7 +188,7 @@ const messageHandle = async (data) => { // Parallel operation with 1000 as one unit let chunkedJsonData = chunk(new Array(dedupedHashList.length).fill(null), 1000); - let chunkedDedupedHashList = chunk(dedupedHashList, 1000); // [1,...,2000] => [[1,...,1000],[1001,...,200]] + let chunkedDedupedHashList = chunk(dedupedHashList, 1000); // [1,...,2000] => [[1,...,1000],[1001,...,2000]] const modifier = (dedupedHashList, jsonData) => { for (let i = 0; i < dedupedHashList.length; i++) { const doc = dedupedHashList[i]; From d60b3c2e08c837c0354b5be21067f48688bd74bc Mon Sep 17 00:00:00 2001 From: Leslie-Wong-H <79917148leslie@gmail.com> Date: Sat, 1 Jul 2023 14:21:31 +0800 Subject: [PATCH 3/4] Build: release shotit-worker 0.9.4 --- README.md | 8 ++++---- package.json | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 2927286..0732b6c 100644 --- a/README.md +++ b/README.md @@ -4,13 +4,13 @@ [![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/shotit/shotit-worker/docker-image.yml?branch=main&style=flat-square)](https://github.com/shotit/shotit-worker/actions) [![GitHub release](https://img.shields.io/github/release/shotit/shotit-worker.svg)](https://github.com/shotit/shotit-worker/releases/latest) [![Watcher Docker](https://img.shields.io/docker/pulls/lesliewong007/shotit-worker-watcher?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-watcher) -[![Watcher Docker Image Size](https://img.shields.io/docker/image-size/lesliewong007/shotit-worker-watcher/v0.9.3?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-watcher) +[![Watcher Docker Image Size](https://img.shields.io/docker/image-size/lesliewong007/shotit-worker-watcher/v0.9.4?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-watcher) [![Hasher Docker](https://img.shields.io/docker/pulls/lesliewong007/shotit-worker-hasher?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-hasher) -[![Hasher Docker Image Size](https://img.shields.io/docker/image-size/lesliewong007/shotit-worker-hasher/v0.9.3?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-hasher) +[![Hasher Docker Image Size](https://img.shields.io/docker/image-size/lesliewong007/shotit-worker-hasher/v0.9.4?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-hasher) [![Loader Docker](https://img.shields.io/docker/pulls/lesliewong007/shotit-worker-loader?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-loader) -[![Loader Docker Image Size](https://img.shields.io/docker/image-size/lesliewong007/shotit-worker-loader/v0.9.3?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-loader) +[![Loader Docker Image Size](https://img.shields.io/docker/image-size/lesliewong007/shotit-worker-loader/v0.9.4?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-loader) [![Searcher Docker](https://img.shields.io/docker/pulls/lesliewong007/shotit-worker-searcher?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-searcher) -[![Searcher Docker Image Size](https://img.shields.io/docker/image-size/lesliewong007/shotit-worker-searcher/v0.9.3?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-searcher) +[![Searcher Docker Image Size](https://img.shields.io/docker/image-size/lesliewong007/shotit-worker-searcher/v0.9.4?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-searcher) Backend workers for [shotit](https://github.com/shotit/shotit). Four core workers of shotit: watcher, hasher, loader and searcher. diff --git a/package.json b/package.json index addda21..984b21a 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "shotit-worker", - "version": "0.9.3", + "version": "0.9.4", "description": "Four core workers of shotit: watcher, hasher, loader and searcher", "main": "", "type": "module", From d0961b1d375992b6c58baec1baca162594e00244 Mon Sep 17 00:00:00 2001 From: Leslie-Wong-H <79917148leslie@gmail.com> Date: Sat, 1 Jul 2023 14:28:37 +0800 Subject: [PATCH 4/4] Style: prettier loader.js --- loader.js | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/loader.js b/loader.js index e2cc451..d726a1d 100644 --- a/loader.js +++ b/loader.js @@ -5,7 +5,7 @@ import lzma from "lzma-native"; import fetch from "node-fetch"; import { MilvusClient } from "@zilliz/milvus2-sdk-node"; import cron from "node-cron"; -import { chunk } from 'lodash'; +import { chunk } from "lodash"; import JBC from "jsbi-calculator"; const { calculator, BigDecimal } = JBC; @@ -200,10 +200,12 @@ const messageHandle = async (data) => { }; } return jsonData; - } - const segments = await Promise.all(chunkedDedupedHashList.map((each, index) => { - return modifier(each, chunkedJsonData[index]) - })); + }; + const segments = await Promise.all( + chunkedDedupedHashList.map((each, index) => { + return modifier(each, chunkedJsonData[index]); + }) + ); const jsonData = flatten(segments); // Pause for 5 seconds to make node arrange the compute resource.