问题
So I have a process that selects from a table. I partition my select programmatically into 20 sub-selects. I then go through each on of those select and stream its data to an indexing client (solr). Every select memory jumps up and holds until I get an OOM.
I logged when each query went off and can be seen in in the following charts:
These correlate with each jump in the this memory graph:
14 of 20 queries ran before I oomed.
I see this behavior with code that is similar but with a delta that runs every 15 mins. Every delta holds some sort of memory until it eventually causes the server to crash with OOM (which recovers)
I have tried to track down issues with the delta past but gave up and just created a way to gracefully restart. What am I missing here?
Here is my entire code chain that makes this work... I know its a lot to look through but I figured as much detail as possible would help.
Library Stack:
"node": "~11.10.1"
"knex": "^0.20.9",
"oracledb": "^4.0.0"
"camelize2": "^1.0.0"
Knex - DB connection factory
'use strict'
const objection = require('objection')
const knex = require('knex')
module.exports = function ObjectionFactory(log) {
class MyObjection extends objection.Model {
constructor() {
super()
}
static get tableName() {
return ''
}
}
MyObjection.pickJsonSchemaProperties = true
log.info('Connecting to Oracle Pluggable...', {
host: 'myHost',
username: 'myUser',
database: 'myDatabase"
})
const knexInstance = knex({
client: 'oracledb',
connection: 'connectionInfo',
pool: {
min: 0,
max: 10
},
acquireConnectionTimeout: 10000
})
process.once('SIGINT', () => {
log.info('Disconnecting from Oracle Pluggable.')
knexInstance.destroy()
.then(() => process.exit(0))
.catch(() => process.exit(1))
})
// Shut down cleanly for nodemon
process.once('SIGUSR2', () => {
log.info('Disconnecting from Oracle Pluggable')
knexInstance.destroy()
.then(() => process.kill(process.pid, 'SIGUSR2'))
.catch(() => process.kill(process.pid, 'SIGUSR2'))
})
const knexBoundClass = MyObjection.bindKnex(knexInstance)
knexBoundClass.tag = 'Oracle Connection'
return knexBoundClass
}
My Select Stream Code:
module.exports = function oracleStream(log, MyObjection) {
const knex = MyObjection.knex()
const fetchArraySize = 10000
const outFormat = oracledb.OBJECT
return {
selectStream
}
async function selectStream(sql, bindings = [], fetchSize = fetchArraySize) {
let connection = await knex.client.acquireConnection()
log.info(`Fetch size is set to ${fetchSize}`)
let select = connection.queryStream(sql, bindings, {
fetchArraySize: fetchSize,
outFormat: outFormat
})
select.on('error', (err) => {
log.error('Oracle Error Event', err)
knex.client.releaseConnection(connection)
})
select.on('end', () => {
log.info('Destroying the Stream')
select.destroy()
})
select.on('close', () => {
log.info('Oracle Close Event')
knex.client.releaseConnection(connection)
select = null
connection = null
})
return select
}
}
My index/stream pipeline code
async function indexJob() {
const reindexStartTime = new moment().local()
let rowCount = 0
log.info('Reindex Started at', reindexStartTime.format())
let queryNumber = 1
const partitionedQueries = ['Select * from table where 1=1', 'Select * from table where 2=2', 'Select * from table where 3=3'] //There would be 20 queries in this array
let partitionedQueriesLength = partitionedQueries.length
while (partitionedQueries.length > 0) {
let query = partitionedQueries.pop()
log.info('RUNNING Query', {
queryNumber: `${queryNumber++} of ${partitionedQueriesLength}`,
query: query
})
let databaseStream = await oracleStream.selectStream(query, [], 10000) //10k represents the oracle fetch size
databaseStream.on('data', () => {
rowCount++
})
let logEveryFiveSec = setInterval(() => {
log.info('Status: ', getReindexInfo(reindexStartTime, rowCount))
}, 5000)
try {
let pipeline = util.promisify(stream.pipeline)
await pipeline(
databaseStream,
camelizeAndStringify(),
streamReindex(core)
)
} catch (err) {
databaseStream.destroy(err)
throw new JobFailedError(err)
} finally {
databaseStream.destroy()
clearInterval(logEveryFiveSec)
}
}
}
function camelizeAndStringify() {
let first = true
const serialize = new Transform({
objectMode: true,
highWaterMark: 1000,
transform(chunk, encoding, callback) {
if (first) {
this.push('[' + JSON.stringify(camelize(chunk)))
first = false
} else {
this.push(',' + JSON.stringify(camelize(chunk)))
}
callback()
chunk = null
},
flush(callback) {
this.push(']')
callback()
}
})
return serialize
}
function streamReindex(core) {
const updateUrl = baseUrl + core + '/update'
const options = {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
'auth': `${user.username}:${user.password}`,
}
let postStream = https.request(updateUrl, options, (res) => {
let response = {
status: {
code: res.statusCode,
message: res.statusMessage
},
headers: res.headers,
}
if (res.statusCode !== 200) {
postStream.destroy(new Error(JSON.stringify(response)))
}
})
postStream.on('error', (err)=>{
throw new Error(err)
})
postStream.on('socket', (socket) => {
socket.setKeepAlive(true, 110000)
})
return postStream
}
来源:https://stackoverflow.com/questions/61259942/nodejs-streams-help-find-my-memory-leak