|
| 1 | +import stream from "node:stream"; |
| 2 | +import { generate } from "csv-generate"; |
| 3 | +import { parse } from "csv-parse"; |
| 4 | +import { stringify } from "csv-stringify"; |
| 5 | + |
| 6 | +let count = 0; |
| 7 | +// Memory information |
| 8 | +const formatMemoryUsage = (data) => |
| 9 | + `${Math.round((data / 1024 / 1024) * 100) / 100} MB`; |
| 10 | +setInterval(() => { |
| 11 | + const memoryData = process.memoryUsage(); |
| 12 | + const memoryUsage = { |
| 13 | + rss: `${formatMemoryUsage( |
| 14 | + memoryData.rss, |
| 15 | + )} -> Resident Set Size - total memory allocated for the process execution`, |
| 16 | + heapTotal: `${formatMemoryUsage( |
| 17 | + memoryData.heapTotal, |
| 18 | + )} -> total size of the allocated heap`, |
| 19 | + heapUsed: `${formatMemoryUsage( |
| 20 | + memoryData.heapUsed, |
| 21 | + )} -> actual memory used during the execution`, |
| 22 | + external: `${formatMemoryUsage(memoryData.external)} -> V8 external memory`, |
| 23 | + }; |
| 24 | + console.log(`${count} records, usage:`, memoryUsage); |
| 25 | +}, 1000); |
| 26 | + |
| 27 | +async function entryPoint() { |
| 28 | + const sourceFilename = "test_10k.zip"; |
| 29 | + const startIndex = 1; |
| 30 | + const outputDelimiter = ";"; |
| 31 | + const fieldCount = 10; |
| 32 | + const outputColumns = [ |
| 33 | + { key: "Id" }, |
| 34 | + { key: "Address" }, |
| 35 | + { key: "Target" }, |
| 36 | + ...Array.from({ length: fieldCount }, (_, x) => ({ key: `Field${x + 1}` })), |
| 37 | + { key: "Message" }, |
| 38 | + ]; |
| 39 | + // parse the first file in the archive |
| 40 | + const parser = generate({ |
| 41 | + delimiter: ";", |
| 42 | + columns: 3, |
| 43 | + }).pipe( |
| 44 | + parse({ |
| 45 | + delimiter: ";", |
| 46 | + columns: ["Id", "Address", "Target"], |
| 47 | + from: startIndex, |
| 48 | + }), |
| 49 | + ); |
| 50 | + const stringifier = stringify({ |
| 51 | + header: true, |
| 52 | + delimiter: outputDelimiter, |
| 53 | + columns: outputColumns, |
| 54 | + cast: { |
| 55 | + boolean: function (value) { |
| 56 | + return "" + value; |
| 57 | + }, |
| 58 | + }, |
| 59 | + }); |
| 60 | + |
| 61 | + // setup the output, originally s3 Upload (AWS SDK v3) but replaced by noop-stream in test |
| 62 | + const outputStream = await stream.Writable({ |
| 63 | + write: (_, __, callback) => callback(), |
| 64 | + }); |
| 65 | + stringifier.pipe(outputStream); |
| 66 | + // process every record |
| 67 | + let recordIndex = 0; |
| 68 | + let errors = []; |
| 69 | + for await (const record of parser) { |
| 70 | + count++; |
| 71 | + if ( |
| 72 | + !validateRecord( |
| 73 | + record, |
| 74 | + recordIndex, |
| 75 | + ["Id", "Address", "Target"], |
| 76 | + sourceFilename, |
| 77 | + errors, |
| 78 | + ) |
| 79 | + ) |
| 80 | + throw new Error(errors.join(";")); |
| 81 | + const restResults = await queryRestApi(record); |
| 82 | + const outputRecord = processResults(restResults, fieldCount); |
| 83 | + stringifier.write(outputRecord); |
| 84 | + recordIndex++; |
| 85 | + } |
| 86 | + stringifier.end(); |
| 87 | + console.log("==> output ended, " + errors.length + " errors"); |
| 88 | + //await upload.done(); |
| 89 | + console.log("==> content file written"); |
| 90 | +} |
| 91 | + |
| 92 | +function validateRecord(record, recordIndex, expectedFields, filePath, errors) { |
| 93 | + if (recordIndex > 1) return true; |
| 94 | + const recordKeys = Object.keys(record); |
| 95 | + let errorCount = 0; |
| 96 | + for (const expectedField of expectedFields) { |
| 97 | + if (!recordKeys.includes(expectedField)) { |
| 98 | + errors.push( |
| 99 | + `[${expectedField}] is missing in ${filePath}, existing fields are: [${recordKeys.join("],[")}]`, |
| 100 | + ); |
| 101 | + errorCount++; |
| 102 | + } |
| 103 | + } |
| 104 | + |
| 105 | + return errorCount == 0; |
| 106 | +} |
| 107 | + |
| 108 | +const sleep = function (ms) { |
| 109 | + return new Promise((resolve) => { |
| 110 | + setTimeout(resolve, ms); |
| 111 | + }); |
| 112 | +}; |
| 113 | + |
| 114 | +async function queryRestApi(record) { |
| 115 | + await sleep(10); |
| 116 | + const restResults = { |
| 117 | + Id: record.Id, |
| 118 | + Address: record.Address, |
| 119 | + Target: record.target, |
| 120 | + Field1: "a", |
| 121 | + Message: "", |
| 122 | + }; |
| 123 | + return restResults; |
| 124 | +} |
| 125 | + |
| 126 | +function processResults(restResults, fieldCount) { |
| 127 | + const resultRecord = restResults; |
| 128 | + resultRecord.Field1 = restResults.Field1.repeat(25); |
| 129 | + for (const index of Array.from({ length: fieldCount - 1 }, (_, x) => x + 2)) |
| 130 | + resultRecord[`Field${index}`] = "b".repeat(25); |
| 131 | + |
| 132 | + return resultRecord; |
| 133 | +} |
| 134 | + |
| 135 | +console.log("starting..."); |
| 136 | +entryPoint() |
| 137 | + .then(console.log("done")) |
| 138 | + .catch((err) => console.log(err)); |
0 commit comments