i have 10 gb log file in particular format, want process file line line , write output other file after applying transformations. using node operation.
though method fine takes hell lot of time this. able within 30-45 mins in java, in node taking more 160 minutes same job. following code:
following initiation code reads each line input.
var path = '../10gb_input_file.txt'; var output_file = '../output.txt'; function fileopsmain(){ fs.exists(output_file, function(exists){ if(exists) { fs.unlink(output_file, function (err) { if (err) throw err; console.log('successfully deleted ' + output_file); }); } }); new lazy(fs.createreadstream(path, {buffersize: 128 * 4096})) .lines .foreach(function(line){ var line_arr = line.tostring().split(';'); perform_line_ops(line_arr, line_arr[6], line_arr[7], line_arr[10]); } ); } this method performs operation on line , passes input write method write output file.
function perform_line_ops(line_arr, range_start, range_end, daynums){ var _new_lines = ''; for(var i=0; i<days; i++){ //perform operation modify line pass print } write_line_ops(_new_lines); } following method used write data new file.
function write_line_ops(line) { if(line != null && line != ''){ fs.appendfilesync(output_file, line); } } i want bring time down 15-20 mins. possible so.
also record i'm trying on intel i7 processor 8 gb of ram.
you can without module. example:
var fs = require('fs'); var inspect = require('util').inspect; var buffer = ''; var rs = fs.createreadstream('foo.log'); rs.on('data', function(chunk) { var lines = (buffer + chunk).split(/\r?\n/g); buffer = lines.pop(); (var = 0; < lines.length; ++i) { // `lines[i]` console.log('found line: ' + inspect(lines[i])); } }); rs.on('end', function() { // optionally process `buffer` here if want treat leftover data without // newline "line" console.log('ended on non-empty buffer: ' + inspect(buffer)); });
Comments
Post a Comment