@SuppressWarnings("unchecked") void finalize(JobFactory factory, String inputPath, long dataSize, UserResolver userResolver, DataStatistics stats, Configuration conf) throws IOException { numJobsInInputTrace = factory.numJobsInTrace; endTime = System.currentTimeMillis(); if ("-".equals(inputPath)) { inputTraceLocation = Summarizer.NA; inputTraceSignature = Summarizer.NA; } else { Path inputTracePath = new Path(inputPath); FileSystem fs = inputTracePath.getFileSystem(conf); inputTraceLocation = fs.makeQualified(inputTracePath).toString(); inputTraceSignature = getTraceSignature(inputPath); } jobSubmissionPolicy = Gridmix.getJobSubmissionPolicy(conf).name(); resolver = userResolver.getClass().getName(); if (dataSize > 0) { expectedDataSize = StringUtils.humanReadableInt(dataSize); } else { expectedDataSize = Summarizer.NA; } dataStats = stats; totalRuntime = System.currentTimeMillis() - getStartTime(); }
static String stringifyDataStatistics(DataStatistics stats) { if (stats != null) { StringBuffer buffer = new StringBuffer(); String compressionStatus = stats.isDataCompressed() ? "Compressed" : "Uncompressed"; buffer.append(compressionStatus).append(" input data size: "); buffer.append(StringUtils.humanReadableInt(stats.getDataSize())); buffer.append(", "); buffer.append("Number of files: ").append(stats.getNumFiles()); return buffer.toString(); } else { return Summarizer.NA; } }
/** * This finalizes the summarizer. */ @SuppressWarnings("unchecked") void finalize(JobFactory factory, String path, long size, UserResolver resolver, DataStatistics stats, Configuration conf) throws IOException { executionSummarizer.finalize(factory, path, size, resolver, stats, conf); }
/** Publishes compression related data statistics. Following statistics are * published * <ul> * <li>Total compressed input data size</li> * <li>Number of compressed input data files</li> * <li>Compression Ratio</li> * <li>Text data dictionary size</li> * <li>Random text word size</li> * </ul> */ static DataStatistics publishCompressedDataStatistics(Path inputDir, Configuration conf, long uncompressedDataSize) throws IOException { FileSystem fs = inputDir.getFileSystem(conf); CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(conf); // iterate over compressed files and sum up the compressed file sizes long compressedDataSize = 0; int numCompressedFiles = 0; // obtain input data file statuses FileStatus[] outFileStatuses = fs.listStatus(inputDir, new Utils.OutputFileUtils.OutputFilesFilter()); for (FileStatus status : outFileStatuses) { // check if the input file is compressed if (compressionCodecs != null) { CompressionCodec codec = compressionCodecs.getCodec(status.getPath()); if (codec != null) { ++numCompressedFiles; compressedDataSize += status.getLen(); } } } LOG.info("Gridmix is configured to use compressed input data."); // publish the input data size LOG.info("Total size of compressed input data : " + StringUtils.humanReadableInt(compressedDataSize)); LOG.info("Total number of compressed input data files : " + numCompressedFiles); if (numCompressedFiles == 0) { throw new RuntimeException("No compressed file found in the input" + " directory : " + inputDir.toString() + ". To enable compression" + " emulation, run Gridmix either with " + " an input directory containing compressed input file(s) or" + " use the -generate option to (re)generate it. If compression" + " emulation is not desired, disable it by setting '" + COMPRESSION_EMULATION_ENABLE + "' to 'false'."); } // publish compression ratio only if its generated in this gridmix run if (uncompressedDataSize > 0) { // compute the compression ratio double ratio = ((double)compressedDataSize) / uncompressedDataSize; // publish the compression ratio LOG.info("Input Data Compression Ratio : " + ratio); } return new DataStatistics(compressedDataSize, numCompressedFiles, true); }