private void doWork() { while (shouldRun) { try { // There's no point in triggering a log roll if the Standby hasn't // read any more transactions since the last time a roll was // triggered. if (tooLongSinceLastLoad() && lastRollTriggerTxId < lastLoadedTxnId) { triggerActiveLogRoll(); } /** * Check again in case someone calls {@link EditLogTailer#stop} while * we're triggering an edit log roll, since ipc.Client catches and * ignores {@link InterruptedException} in a few places. This fixes * the bug described in HDFS-2823. */ if (!shouldRun) { break; } doTailEdits(); } catch (EditLogInputException elie) { LOG.warn("Error while reading edits from disk. Will try again.", elie); } catch (InterruptedException ie) { // interrupter should have already set shouldRun to false continue; } catch (Throwable t) { LOG.fatal("Unknown error encountered while tailing edits. " + "Shutting down standby NN.", t); terminate(1, t); } try { Thread.sleep(sleepTimeMs); } catch (InterruptedException e) { LOG.warn("Edit log tailer interrupted", e); } } }
@VisibleForTesting void doTailEdits() throws IOException, InterruptedException { // Write lock needs to be interruptible here because the // transitionToActive RPC takes the write lock before calling // tailer.stop() -- so if we're not interruptible, it will // deadlock. namesystem.writeLockInterruptibly(); try { FSImage image = namesystem.getFSImage(); long lastTxnId = image.getLastAppliedTxId(); if (LOG.isDebugEnabled()) { LOG.debug("lastTxnId: " + lastTxnId); } Collection<EditLogInputStream> streams; try { streams = editLog.selectInputStreams(lastTxnId + 1, 0, null, false); } catch (IOException ioe) { // This is acceptable. If we try to tail edits in the middle of an edits // log roll, i.e. the last one has been finalized but the new inprogress // edits file hasn't been started yet. LOG.warn("Edits tailer failed to find any streams. Will try again " + "later.", ioe); return; } if (LOG.isDebugEnabled()) { LOG.debug("edit streams to load from: " + streams.size()); } // Once we have streams to load, errors encountered are legitimate cause // for concern, so we don't catch them here. Simple errors reading from // disk are ignored. long editsLoaded = 0; try { editsLoaded = image.loadEdits(streams, namesystem); } catch (EditLogInputException elie) { editsLoaded = elie.getNumEditsLoaded(); throw elie; } finally { if (editsLoaded > 0 || LOG.isDebugEnabled()) { LOG.info(String.format("Loaded %d edits starting from txid %d ", editsLoaded, lastTxnId)); } } if (editsLoaded > 0) { lastLoadTimeMs = monotonicNow(); } lastLoadedTxnId = image.getLastAppliedTxId(); } finally { namesystem.writeUnlock(); } }
private void doWork() { while (shouldRun) { try { // There's no point in triggering a log roll if the Standby hasn't // read any more transactions since the last time a roll was // triggered. if (tooLongSinceLastLoad() && lastRollTriggerTxId < lastLoadedTxnId) { triggerActiveLogRoll(); } /** * Check again in case someone calls {@link EditLogTailer#stop} while * we're triggering an edit log roll, since ipc.Client catches and * ignores {@link InterruptedException} in a few places. This fixes * the bug described in HDFS-2823. */ if (!shouldRun) { break; } // Prevent reading of name system while being modified. The full // name system lock will be acquired to further block even the block // state updates. namesystem.cpLockInterruptibly(); try { doTailEdits(); } finally { namesystem.cpUnlock(); } } catch (EditLogInputException elie) { LOG.warn("Error while reading edits from disk. Will try again.", elie); } catch (InterruptedException ie) { // interrupter should have already set shouldRun to false continue; } catch (Throwable t) { LOG.fatal("Unknown error encountered while tailing edits. " + "Shutting down standby NN.", t); terminate(1, t); } try { Thread.sleep(sleepTimeMs); } catch (InterruptedException e) { LOG.warn("Edit log tailer interrupted", e); } } }
@VisibleForTesting void doTailEdits() throws IOException, InterruptedException { // Write lock needs to be interruptible here because the // transitionToActive RPC takes the write lock before calling // tailer.stop() -- so if we're not interruptible, it will // deadlock. namesystem.writeLockInterruptibly(); try { FSImage image = namesystem.getFSImage(); long lastTxnId = image.getLastAppliedTxId(); if (LOG.isDebugEnabled()) { LOG.debug("lastTxnId: " + lastTxnId); } Collection<EditLogInputStream> streams; try { streams = editLog.selectInputStreams(lastTxnId + 1, 0, null, false); } catch (IOException ioe) { // This is acceptable. If we try to tail edits in the middle of an edits // log roll, i.e. the last one has been finalized but the new inprogress // edits file hasn't been started yet. LOG.warn("Edits tailer failed to find any streams. Will try again " + "later.", ioe); return; } if (LOG.isDebugEnabled()) { LOG.debug("edit streams to load from: " + streams.size()); } // Once we have streams to load, errors encountered are legitimate cause // for concern, so we don't catch them here. Simple errors reading from // disk are ignored. long editsLoaded = 0; try { editsLoaded = image.loadEdits(streams, namesystem); } catch (EditLogInputException elie) { editsLoaded = elie.getNumEditsLoaded(); throw elie; } finally { if (editsLoaded > 0 || LOG.isDebugEnabled()) { LOG.debug(String.format("Loaded %d edits starting from txid %d ", editsLoaded, lastTxnId)); } } if (editsLoaded > 0) { lastLoadTimeMs = monotonicNow(); } lastLoadedTxnId = image.getLastAppliedTxId(); } finally { namesystem.writeUnlock(); } }
private void doWork() { while (shouldRun) { try { // There's no point in triggering a log roll if the Standby hasn't // read any more transactions since the last time a roll was // triggered. if (tooLongSinceLastLoad() && lastRollTriggerTxId < lastLoadedTxnId) { triggerActiveLogRoll(); } /** * Check again in case someone calls {@link EditLogTailer#stop} while * we're triggering an edit log roll, since ipc.Client catches and * ignores {@link InterruptedException} in a few places. This fixes * the bug described in HDFS-2823. */ if (!shouldRun) { break; } // Prevent reading of name system while being modified. The full // name system lock will be acquired to further block even the block // state updates. namesystem.cpLockInterruptibly(); try { doTailEdits(); } finally { namesystem.cpUnlock(); } //Update NameDirSize Metric namesystem.getFSImage().getStorage().updateNameDirSize(); } catch (EditLogInputException elie) { LOG.warn("Error while reading edits from disk. Will try again.", elie); } catch (InterruptedException ie) { // interrupter should have already set shouldRun to false continue; } catch (Throwable t) { LOG.fatal("Unknown error encountered while tailing edits. " + "Shutting down standby NN.", t); terminate(1, t); } try { Thread.sleep(sleepTimeMs); } catch (InterruptedException e) { LOG.warn("Edit log tailer interrupted", e); } } }
@VisibleForTesting void doTailEdits() throws IOException, InterruptedException { // Write lock needs to be interruptible here because the // transitionToActive RPC takes the write lock before calling // tailer.stop() -- so if we're not interruptible, it will // deadlock. namesystem.writeLockInterruptibly(); try { FSImage image = namesystem.getFSImage(); long lastTxnId = image.getLastAppliedTxId(); if (LOG.isDebugEnabled()) { LOG.debug("lastTxnId: " + lastTxnId); } Collection<EditLogInputStream> streams; try { streams = editLog.selectInputStreams(lastTxnId + 1, 0, null, false); } catch (IOException ioe) { // This is acceptable. If we try to tail edits in the middle of an edits // log roll, i.e. the last one has been finalized but the new inprogress // edits file hasn't been started yet. LOG.warn("Edits tailer failed to find any streams. Will try again " + "later.", ioe); return; } if (LOG.isDebugEnabled()) { LOG.debug("edit streams to load from: " + streams.size()); } // Once we have streams to load, errors encountered are legitimate cause // for concern, so we don't catch them here. Simple errors reading from // disk are ignored. long editsLoaded = 0; try { editsLoaded = image.loadEdits(streams, namesystem); } catch (EditLogInputException elie) { editsLoaded = elie.getNumEditsLoaded(); throw elie; } finally { if (editsLoaded > 0 || LOG.isDebugEnabled()) { LOG.info(String.format("Loaded %d edits starting from txid %d ", editsLoaded, lastTxnId)); } } if (editsLoaded > 0) { lastLoadTimestamp = now(); } lastLoadedTxnId = image.getLastAppliedTxId(); } finally { namesystem.writeUnlock(); } }
@VisibleForTesting void doTailEdits() throws IOException, InterruptedException { // Write lock needs to be interruptible here because the // transitionToActive RPC takes the write lock before calling // tailer.stop() -- so if we're not interruptible, it will // deadlock. namesystem.writeLockInterruptibly(); try { FSImage image = namesystem.getFSImage(); long lastTxnId = image.getLastAppliedTxId(); if (LOG.isDebugEnabled()) { LOG.debug("lastTxnId: " + lastTxnId); } Collection<EditLogInputStream> streams; try { streams = editLog.selectInputStreams(lastTxnId + 1, 0, null, false); } catch (IOException ioe) { // This is acceptable. If we try to tail edits in the middle of an edits // log roll, i.e. the last one has been finalized but the new inprogress // edits file hasn't been started yet. LOG.warn("Edits tailer failed to find any streams. Will try again " + "later.", ioe); return; } if (LOG.isDebugEnabled()) { LOG.debug("edit streams to load from: " + streams.size()); } // Once we have streams to load, errors encountered are legitimate cause // for concern, so we don't catch them here. Simple errors reading from // disk are ignored. long editsLoaded = 0; try { editsLoaded = image.loadEdits(streams, namesystem, null); } catch (EditLogInputException elie) { editsLoaded = elie.getNumEditsLoaded(); throw elie; } finally { if (editsLoaded > 0 || LOG.isDebugEnabled()) { LOG.info(String.format("Loaded %d edits starting from txid %d ", editsLoaded, lastTxnId)); } } if (editsLoaded > 0) { lastLoadTimestamp = now(); } lastLoadedTxnId = image.getLastAppliedTxId(); } finally { namesystem.writeUnlock(); } }