node.js - Nodejs Read very large file(~10GB), Process line by line then write to other file -


i have 10 gb log file in particular format, want process file line line , write output other file after applying transformations. using node operation.

though method fine takes hell lot of time this. able within 30-45 mins in java, in node taking more 160 minutes same job. following code:

following initiation code reads each line input.

var path = '../10gb_input_file.txt'; var output_file = '../output.txt';  function fileopsmain(){      fs.exists(output_file, function(exists){         if(exists) {             fs.unlink(output_file, function (err) {                 if (err) throw err;                 console.log('successfully deleted ' + output_file);             });         }     });      new lazy(fs.createreadstream(path, {buffersize: 128 * 4096}))         .lines         .foreach(function(line){             var line_arr = line.tostring().split(';');             perform_line_ops(line_arr, line_arr[6], line_arr[7], line_arr[10]);         }     );  } 

this method performs operation on line , passes input write method write output file.

function perform_line_ops(line_arr, range_start, range_end, daynums){      var _new_lines = '';     for(var i=0; i<days; i++){         //perform operation modify line pass print     }      write_line_ops(_new_lines); } 

following method used write data new file.

function write_line_ops(line) {     if(line != null && line != ''){         fs.appendfilesync(output_file, line);     } } 

i want bring time down 15-20 mins. possible so.

also record i'm trying on intel i7 processor 8 gb of ram.

you can without module. example:

var fs = require('fs'); var inspect = require('util').inspect;  var buffer = ''; var rs = fs.createreadstream('foo.log'); rs.on('data', function(chunk) {   var lines = (buffer + chunk).split(/\r?\n/g);   buffer = lines.pop();   (var = 0; < lines.length; ++i) {     // `lines[i]`     console.log('found line: ' + inspect(lines[i]));   } }); rs.on('end', function() {   // optionally process `buffer` here if want treat leftover data without   // newline "line"   console.log('ended on non-empty buffer: ' + inspect(buffer)); }); 

Comments