pull file streaming out into its own file, leave a few notes
This commit is contained in:
parent
ccd6e1d6df
commit
35eca2c5d2
@ -10,11 +10,9 @@ var Util = require("../common-util");
|
|||||||
var Meta = require("../metadata");
|
var Meta = require("../metadata");
|
||||||
var Extras = require("../hk-util");
|
var Extras = require("../hk-util");
|
||||||
|
|
||||||
const Schedule = require("../schedule");
|
const readFileBin = require("../stream-file").readFileBin;
|
||||||
const Readline = require("readline");
|
|
||||||
const ToPull = require('stream-to-pull-stream');
|
|
||||||
const Pull = require('pull-stream');
|
|
||||||
|
|
||||||
|
const Schedule = require("../schedule");
|
||||||
const isValidChannelId = function (id) {
|
const isValidChannelId = function (id) {
|
||||||
return typeof(id) === 'string' &&
|
return typeof(id) === 'string' &&
|
||||||
id.length >= 32 && id.length < 50 &&
|
id.length >= 32 && id.length < 50 &&
|
||||||
@ -60,13 +58,24 @@ var channelExists = function (filepath, cb) {
|
|||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// readMessagesBin asynchronously iterates over the messages in a channel log
|
||||||
|
// the handler for each message must call back to read more, which should mean
|
||||||
|
// that this function has a lower memory profile than our classic method
|
||||||
|
// of reading logs line by line.
|
||||||
|
// it also allows the handler to abort reading at any time
|
||||||
|
const readMessagesBin = (env, id, start, msgHandler, cb) => {
|
||||||
|
const stream = Fs.createReadStream(mkPath(env, id), { start: start });
|
||||||
|
return void readFileBin(env, stream, msgHandler, cb);
|
||||||
|
};
|
||||||
|
|
||||||
// reads classic metadata from a channel log and aborts
|
// reads classic metadata from a channel log and aborts
|
||||||
// returns undefined if the first message was not an object (not an array)
|
// returns undefined if the first message was not an object (not an array)
|
||||||
var getMetadataAtPath = function (Env, path, _cb) {
|
var getMetadataAtPath = function (Env, path, _cb) {
|
||||||
var stream;
|
const stream = Fs.createReadStream(path, { start: 0 });
|
||||||
|
|
||||||
// cb implicitly destroys the stream, if it exists
|
// cb implicitly destroys the stream, if it exists
|
||||||
// and calls back asynchronously no more than once
|
// and calls back asynchronously no more than once
|
||||||
|
/*
|
||||||
var cb = Util.once(Util.both(function () {
|
var cb = Util.once(Util.both(function () {
|
||||||
try {
|
try {
|
||||||
stream.destroy();
|
stream.destroy();
|
||||||
@ -74,20 +83,26 @@ var getMetadataAtPath = function (Env, path, _cb) {
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
}, Util.mkAsync(_cb)));
|
}, Util.mkAsync(_cb)));
|
||||||
|
*/
|
||||||
|
|
||||||
// stream creation emit errors... probably ENOENT
|
var cb = Util.once(Util.mkAsync(_cb), function () {
|
||||||
stream = Fs.createReadStream(path, { encoding: 'utf8' }).on('error', cb);
|
throw new Error("Multiple Callbacks");
|
||||||
|
|
||||||
// stream lines
|
|
||||||
const rl = Readline.createInterface({
|
|
||||||
input: stream,
|
|
||||||
});
|
});
|
||||||
|
|
||||||
var i = 0;
|
var i = 0;
|
||||||
rl
|
return readFileBin(Env, stream, function (msgObj, readMore, abort) {
|
||||||
.on('line', function (line) {
|
const line = msgObj.buff.toString('utf8');
|
||||||
|
|
||||||
|
if (!line) {
|
||||||
|
return readMore();
|
||||||
|
}
|
||||||
|
|
||||||
// metadata should always be on the first line or not exist in the channel at all
|
// metadata should always be on the first line or not exist in the channel at all
|
||||||
if (i++ > 0) { return void cb(); }
|
if (i++ > 0) {
|
||||||
|
console.log("aborting");
|
||||||
|
abort();
|
||||||
|
return void cb();
|
||||||
|
}
|
||||||
var metadata;
|
var metadata;
|
||||||
try {
|
try {
|
||||||
metadata = JSON.parse(line);
|
metadata = JSON.parse(line);
|
||||||
@ -102,9 +117,10 @@ var getMetadataAtPath = function (Env, path, _cb) {
|
|||||||
// if you can't parse, that's bad
|
// if you can't parse, that's bad
|
||||||
return void cb("INVALID_METADATA");
|
return void cb("INVALID_METADATA");
|
||||||
}
|
}
|
||||||
})
|
readMore();
|
||||||
.on('close', cb)
|
}, function (err) {
|
||||||
.on('error', cb);
|
cb(err);
|
||||||
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
var closeChannel = function (env, channelName, cb) {
|
var closeChannel = function (env, channelName, cb) {
|
||||||
@ -150,6 +166,7 @@ var clearChannel = function (env, channelId, _cb) {
|
|||||||
/* readMessages is our classic method of reading messages from the disk
|
/* readMessages is our classic method of reading messages from the disk
|
||||||
notably doesn't provide a means of aborting if you finish early
|
notably doesn't provide a means of aborting if you finish early
|
||||||
*/
|
*/
|
||||||
|
// XXX replicate current API on top of readMessagesBin
|
||||||
var readMessages = function (path, msgHandler, cb) {
|
var readMessages = function (path, msgHandler, cb) {
|
||||||
var remainder = '';
|
var remainder = '';
|
||||||
var stream = Fs.createReadStream(path, { encoding: 'utf8' });
|
var stream = Fs.createReadStream(path, { encoding: 'utf8' });
|
||||||
@ -186,6 +203,7 @@ var getChannelMetadata = function (Env, channelId, cb) {
|
|||||||
// low level method for getting just the dedicated metadata channel
|
// low level method for getting just the dedicated metadata channel
|
||||||
var getDedicatedMetadata = function (env, channelId, handler, cb) {
|
var getDedicatedMetadata = function (env, channelId, handler, cb) {
|
||||||
var metadataPath = mkMetadataPath(env, channelId);
|
var metadataPath = mkMetadataPath(env, channelId);
|
||||||
|
// XXX use readFileBin
|
||||||
readMessages(metadataPath, function (line) {
|
readMessages(metadataPath, function (line) {
|
||||||
if (!line) { return; }
|
if (!line) { return; }
|
||||||
try {
|
try {
|
||||||
@ -266,75 +284,6 @@ var writeMetadata = function (env, channelId, data, cb) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
// transform a stream of arbitrarily divided data
|
|
||||||
// into a stream of buffers divided by newlines in the source stream
|
|
||||||
// TODO see if we could improve performance by using libnewline
|
|
||||||
const NEWLINE_CHR = ('\n').charCodeAt(0);
|
|
||||||
const mkBufferSplit = () => {
|
|
||||||
let remainder = null;
|
|
||||||
return Pull((read) => {
|
|
||||||
return (abort, cb) => {
|
|
||||||
read(abort, function (end, data) {
|
|
||||||
if (end) {
|
|
||||||
if (data) { console.log("mkBufferSplit() Data at the end"); }
|
|
||||||
cb(end, remainder ? [remainder, data] : [data]);
|
|
||||||
remainder = null;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const queue = [];
|
|
||||||
for (;;) {
|
|
||||||
const offset = data.indexOf(NEWLINE_CHR);
|
|
||||||
if (offset < 0) {
|
|
||||||
remainder = remainder ? Buffer.concat([remainder, data]) : data;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
let subArray = data.slice(0, offset);
|
|
||||||
if (remainder) {
|
|
||||||
subArray = Buffer.concat([remainder, subArray]);
|
|
||||||
remainder = null;
|
|
||||||
}
|
|
||||||
queue.push(subArray);
|
|
||||||
data = data.slice(offset + 1);
|
|
||||||
}
|
|
||||||
cb(end, queue);
|
|
||||||
});
|
|
||||||
};
|
|
||||||
}, Pull.flatten());
|
|
||||||
};
|
|
||||||
|
|
||||||
// return a streaming function which transforms buffers into objects
|
|
||||||
// containing the buffer and the offset from the start of the stream
|
|
||||||
const mkOffsetCounter = () => {
|
|
||||||
let offset = 0;
|
|
||||||
return Pull.map((buff) => {
|
|
||||||
const out = { offset: offset, buff: buff };
|
|
||||||
// +1 for the eaten newline
|
|
||||||
offset += buff.length + 1;
|
|
||||||
return out;
|
|
||||||
});
|
|
||||||
};
|
|
||||||
|
|
||||||
// readMessagesBin asynchronously iterates over the messages in a channel log
|
|
||||||
// the handler for each message must call back to read more, which should mean
|
|
||||||
// that this function has a lower memory profile than our classic method
|
|
||||||
// of reading logs line by line.
|
|
||||||
// it also allows the handler to abort reading at any time
|
|
||||||
const readMessagesBin = (env, id, start, msgHandler, cb) => {
|
|
||||||
const stream = Fs.createReadStream(mkPath(env, id), { start: start });
|
|
||||||
let keepReading = true;
|
|
||||||
Pull(
|
|
||||||
ToPull.read(stream),
|
|
||||||
mkBufferSplit(),
|
|
||||||
mkOffsetCounter(),
|
|
||||||
Pull.asyncMap((data, moreCb) => {
|
|
||||||
msgHandler(data, moreCb, () => { keepReading = false; moreCb(); });
|
|
||||||
}),
|
|
||||||
Pull.drain(() => (keepReading), (err) => {
|
|
||||||
cb((keepReading) ? err : undefined);
|
|
||||||
})
|
|
||||||
);
|
|
||||||
};
|
|
||||||
|
|
||||||
// check if a file exists at $path
|
// check if a file exists at $path
|
||||||
var checkPath = function (path, callback) {
|
var checkPath = function (path, callback) {
|
||||||
Fs.stat(path, function (err) {
|
Fs.stat(path, function (err) {
|
||||||
@ -428,6 +377,7 @@ var removeArchivedChannel = function (env, channelName, cb) {
|
|||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// XXX use ../plan.js
|
||||||
var listChannels = function (root, handler, cb) {
|
var listChannels = function (root, handler, cb) {
|
||||||
// do twenty things at a time
|
// do twenty things at a time
|
||||||
var sema = Semaphore.create(20);
|
var sema = Semaphore.create(20);
|
||||||
|
|||||||
76
lib/stream-file.js
Normal file
76
lib/stream-file.js
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
/* jshint esversion: 6 */
|
||||||
|
/* global Buffer */
|
||||||
|
|
||||||
|
const ToPull = require('stream-to-pull-stream');
|
||||||
|
const Pull = require('pull-stream');
|
||||||
|
|
||||||
|
const Stream = module.exports;
|
||||||
|
|
||||||
|
// transform a stream of arbitrarily divided data
|
||||||
|
// into a stream of buffers divided by newlines in the source stream
|
||||||
|
// TODO see if we could improve performance by using libnewline
|
||||||
|
const NEWLINE_CHR = ('\n').charCodeAt(0);
|
||||||
|
const mkBufferSplit = () => {
|
||||||
|
let remainder = null;
|
||||||
|
return Pull((read) => {
|
||||||
|
return (abort, cb) => {
|
||||||
|
read(abort, function (end, data) {
|
||||||
|
if (end) {
|
||||||
|
if (data) { console.log("mkBufferSplit() Data at the end"); }
|
||||||
|
cb(end, remainder ? [remainder, data] : [data]);
|
||||||
|
remainder = null;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const queue = [];
|
||||||
|
for (;;) {
|
||||||
|
const offset = data.indexOf(NEWLINE_CHR);
|
||||||
|
if (offset < 0) {
|
||||||
|
remainder = remainder ? Buffer.concat([remainder, data]) : data;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
let subArray = data.slice(0, offset);
|
||||||
|
if (remainder) {
|
||||||
|
subArray = Buffer.concat([remainder, subArray]);
|
||||||
|
remainder = null;
|
||||||
|
}
|
||||||
|
queue.push(subArray);
|
||||||
|
data = data.slice(offset + 1);
|
||||||
|
}
|
||||||
|
cb(end, queue);
|
||||||
|
});
|
||||||
|
};
|
||||||
|
}, Pull.flatten());
|
||||||
|
};
|
||||||
|
|
||||||
|
// return a streaming function which transforms buffers into objects
|
||||||
|
// containing the buffer and the offset from the start of the stream
|
||||||
|
const mkOffsetCounter = () => {
|
||||||
|
let offset = 0;
|
||||||
|
return Pull.map((buff) => {
|
||||||
|
const out = { offset: offset, buff: buff };
|
||||||
|
// +1 for the eaten newline
|
||||||
|
offset += buff.length + 1;
|
||||||
|
return out;
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
// readMessagesBin asynchronously iterates over the messages in a channel log
|
||||||
|
// the handler for each message must call back to read more, which should mean
|
||||||
|
// that this function has a lower memory profile than our classic method
|
||||||
|
// of reading logs line by line.
|
||||||
|
// it also allows the handler to abort reading at any time
|
||||||
|
Stream.readFileBin = (env, stream, msgHandler, cb) => {
|
||||||
|
//const stream = Fs.createReadStream(path, { start: start });
|
||||||
|
let keepReading = true;
|
||||||
|
Pull(
|
||||||
|
ToPull.read(stream),
|
||||||
|
mkBufferSplit(),
|
||||||
|
mkOffsetCounter(),
|
||||||
|
Pull.asyncMap((data, moreCb) => {
|
||||||
|
msgHandler(data, moreCb, () => { keepReading = false; moreCb(); });
|
||||||
|
}),
|
||||||
|
Pull.drain(() => (keepReading), (err) => {
|
||||||
|
cb((keepReading) ? err : undefined);
|
||||||
|
})
|
||||||
|
);
|
||||||
|
};
|
||||||
Loading…
x
Reference in New Issue
Block a user