public void checkOutputSpecs(JobContext job ) throws FileAlreadyExistsException, IOException{ // Ensure that the output directory is set and not already there Path outDir = getOutputPath(job); if (outDir == null) { throw new InvalidJobConfException("Output directory not set."); } // get delegation token for outDir's file system TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, job.getConfiguration()); if (outDir.getFileSystem(job.getConfiguration()).exists(outDir)) { throw new FileAlreadyExistsException("Output directory " + outDir + " already exists"); } }
public void checkOutputSpecs(JobContext job ) throws FileAlreadyExistsException, IOException{ // Ensure that the output directory is set and not already there Path outDir = getOutputPath(job); if (outDir == null) { throw new InvalidJobConfException("Output directory not set."); } // get delegation token for outDir's file system TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] {outDir}, job.getConfiguration()); if (outDir.getFileSystem(job.getConfiguration()).exists(outDir)) { throw new FileAlreadyExistsException("Output directory " + outDir + " already exists"); } }
/** * Overridden to avoid throwing an exception if the specified directory * for export already exists. */ @Override public void checkOutputSpecs(JobContext job) throws FileAlreadyExistsException, IOException { Path outDir = getOutputPath(job); if(outDir == null) { throw new InvalidJobConfException("Output directory not set."); } else { TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[]{outDir}, job.getConfiguration()); /* if(outDir.getFileSystem(job.getConfiguration()).exists(outDir)) { System.out.println("Output dir already exists, no problem"); throw new FileAlreadyExistsException("Output directory " + outDir + " already exists"); } */ } }
@Override public void checkOutputSpecs(FileSystem ignored, JobConf job) throws FileAlreadyExistsException, InvalidJobConfException, IOException { // Ensure that the output directory is set and not already there Path outDir = getOutputPath(job); if (outDir == null && job.getNumReduceTasks() != 0) { throw new InvalidJobConfException("Output directory not set in JobConf."); } if (outDir != null) { FileSystem fs = outDir.getFileSystem(job); // normalize the output directory outDir = fs.makeQualified(outDir); setOutputPath(job, outDir); // get delegation token for the outDir's file system TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[]{outDir}, job); String jobUuid = job.get("zephyr.job.uuid"); if (jobUuid == null) throw new InvalidJobConfException("This output format REQUIRES the value zephyr.job.uuid to be specified in the job configuration!"); // // check its existence // if (fs.exists(outDir)) { // throw new FileAlreadyExistsException("Output directory " + outDir // + " already exists"); // } } }
@Override public void checkOutputSpecs(JobContext job) throws IOException { super.checkOutputSpecs(job); if (getCompressOutput(job) && getOutputCompressionType(job) == CompressionType.RECORD ) { throw new InvalidJobConfException("SequenceFileAsBinaryOutputFormat " + "doesn't support Record Compression" ); } }
@Override public void checkOutputSpecs(JobContext job ) throws InvalidJobConfException, IOException { // Ensure that the output directory is set Path outDir = getOutputPath(job); if (outDir == null) { throw new InvalidJobConfException("Output directory not set in JobConf."); } final Configuration jobConf = job.getConfiguration(); // get delegation token for outDir's file system TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, jobConf); final FileSystem fs = outDir.getFileSystem(jobConf); if (fs.exists(outDir)) { // existing output dir is considered empty iff its only content is the // partition file. // final FileStatus[] outDirKids = fs.listStatus(outDir); boolean empty = false; if (outDirKids != null && outDirKids.length == 1) { final FileStatus st = outDirKids[0]; final String fname = st.getPath().getName(); empty = !st.isDirectory() && TeraInputFormat.PARTITION_FILENAME.equals(fname); } if (TeraSort.getUseSimplePartitioner(job) || !empty) { throw new FileAlreadyExistsException("Output directory " + outDir + " already exists"); } } }
@Override public void checkOutputSpecs(FileSystem ignored, JobConf job) throws FileAlreadyExistsException, InvalidJobConfException, IOException { String tableName = job.get(OUTPUT_TABLE); if (tableName == null) { throw new IOException("Must specify table name"); } }
@SuppressWarnings("deprecation") @Test(timeout = 120000, expected = InvalidJobConfException.class) public void testSetupDistributedCacheConflicts() throws Exception { Configuration conf = new Configuration(); conf.setClass("fs.mockfs.impl", MockFileSystem.class, FileSystem.class); URI mockUri = URI.create("mockfs://mock/"); FileSystem mockFs = ((FilterFileSystem)FileSystem.get(mockUri, conf)) .getRawFileSystem(); URI archive = new URI("mockfs://mock/tmp/something.zip#something"); Path archivePath = new Path(archive); URI file = new URI("mockfs://mock/tmp/something.txt#something"); Path filePath = new Path(file); when(mockFs.resolvePath(archivePath)).thenReturn(archivePath); when(mockFs.resolvePath(filePath)).thenReturn(filePath); DistributedCache.addCacheArchive(archive, conf); conf.set(MRJobConfig.CACHE_ARCHIVES_TIMESTAMPS, "10"); conf.set(MRJobConfig.CACHE_ARCHIVES_SIZES, "10"); conf.set(MRJobConfig.CACHE_ARCHIVES_VISIBILITIES, "true"); DistributedCache.addCacheFile(file, conf); conf.set(MRJobConfig.CACHE_FILE_TIMESTAMPS, "11"); conf.set(MRJobConfig.CACHE_FILES_SIZES, "11"); conf.set(MRJobConfig.CACHE_FILE_VISIBILITIES, "true"); Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); MRApps.setupDistributedCache(conf, localResources); }
@SuppressWarnings("deprecation") @Test(timeout = 120000, expected = InvalidJobConfException.class) public void testSetupDistributedCacheConflictsFiles() throws Exception { Configuration conf = new Configuration(); conf.setClass("fs.mockfs.impl", MockFileSystem.class, FileSystem.class); URI mockUri = URI.create("mockfs://mock/"); FileSystem mockFs = ((FilterFileSystem)FileSystem.get(mockUri, conf)) .getRawFileSystem(); URI file = new URI("mockfs://mock/tmp/something.zip#something"); Path filePath = new Path(file); URI file2 = new URI("mockfs://mock/tmp/something.txt#something"); Path file2Path = new Path(file2); when(mockFs.resolvePath(filePath)).thenReturn(filePath); when(mockFs.resolvePath(file2Path)).thenReturn(file2Path); DistributedCache.addCacheFile(file, conf); DistributedCache.addCacheFile(file2, conf); conf.set(MRJobConfig.CACHE_FILE_TIMESTAMPS, "10,11"); conf.set(MRJobConfig.CACHE_FILES_SIZES, "10,11"); conf.set(MRJobConfig.CACHE_FILE_VISIBILITIES, "true,true"); Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); MRApps.setupDistributedCache(conf, localResources); }
@Override public void checkOutputSpecs(JobContext job ) throws InvalidJobConfException, IOException { // Ensure that the output directory is set Path outDir = getOutputPath(job); if (outDir == null) { throw new InvalidJobConfException("Output directory not set in JobConf."); } // get delegation token for outDir's file system TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, job.getConfiguration()); }
public void checkOutputSpecs(FileSystem fs, JobConf job) throws IOException { Path out = FileOutputFormat.getOutputPath(job); if ((out == null) && (job.getNumReduceTasks() != 0)) { throw new InvalidJobConfException( "Output directory not set in JobConf."); } if (fs == null) { fs = out.getFileSystem(job); } if (fs.exists(new Path(out, CrawlDatum.PARSE_DIR_NAME))) throw new IOException("Segment already parsed!"); }
public void checkOutputSpecs(FileSystem fs, JobConf job) throws IOException { Path out = FileOutputFormat.getOutputPath(job); if ((out == null) && (job.getNumReduceTasks() != 0)) { throw new InvalidJobConfException("Output directory not set in JobConf."); } if (fs == null) { fs = out.getFileSystem(job); } if (fs.exists(new Path(out, CrawlDatum.FETCH_DIR_NAME))) throw new IOException("Segment already fetched!"); }
public MapRedGfxdRecordWriter(Configuration conf) throws IOException { this.tableName = conf.get(OUTPUT_TABLE); try { this.batchExecutor = util.new RowCommandBatchExecutor(getDriver(conf), conf.get(OUTPUT_URL), conf.getInt(OUTPUT_BATCH_SIZE, OUTPUT_BATCH_SIZE_DEFAULT)); } catch (ClassNotFoundException e) { logger.error("Gemfirexd client classes are missing from the classpath", e); throw new InvalidJobConfException(e); } }
/** * Validates correctness and completeness of job's output configuration. Job * configuration must contain url, table and schema name * * @param conf * job conf * @throws InvalidJobConfException */ protected static void validateConfiguration(Configuration conf) throws InvalidJobConfException { // User must configure the output region name. String url = conf.get(OUTPUT_URL); if (url == null || url.trim().isEmpty()) { throw new InvalidJobConfException("Output URL not configured."); } String table = conf.get(OUTPUT_TABLE); if (table == null || table.trim().isEmpty()) { throw new InvalidJobConfException("Output table name not provided."); } }
public GfxdRecordWriter(Configuration conf) throws IOException { this.tableName = conf.get(OUTPUT_TABLE); try { this.batchExecutor = util.new RowCommandBatchExecutor(getDriver(conf), conf.get(OUTPUT_URL), conf.getInt(OUTPUT_BATCH_SIZE, OUTPUT_BATCH_SIZE_DEFAULT)); } catch (ClassNotFoundException e) { logger.error("Gemfirexd client classes are missing from the classpath", e); throw new InvalidJobConfException(e); } }
/** * Validates correctness and completeness of job's output configuration * * @param conf * @throws InvalidJobConfException */ protected void validateConfiguration(Configuration conf) throws InvalidJobConfException { // User must configure the output region name. String region = conf.get(REGION); if (region == null || region.trim().isEmpty()) { throw new InvalidJobConfException("Output Region name not provided."); } // TODO validate if a client connected to gemfire cluster can be created }
@Override public void checkOutputSpecs(FileSystem ignored, JobConf job) throws FileAlreadyExistsException, InvalidJobConfException, IOException { String tableName = job.get(OUTPUT_TABLE); if(tableName == null) { throw new IOException("Must specify table name"); } }
/** * This is part of the framework API. It's called within the job * submission code only, not by users. In the non-error case it has * no side effects and returns normally. If there's a URI in both * mapred.cache.files and mapred.cache.archives, it throws its * exception. * @param conf a {@link Configuration} to be cheked for duplication * in cached URIs * @throws InvalidJobConfException **/ public static void validate(Configuration conf) throws InvalidJobConfException { final String[] archiveStrings = conf.getStrings(DistributedCache.CACHE_ARCHIVES); final String[] fileStrings = conf.getStrings(DistributedCache.CACHE_FILES); Path thisSubject = null; if (archiveStrings != null && fileStrings != null) { final Set<Path> archivesSet = new HashSet<Path>(); for (String archiveString : archiveStrings) { archivesSet.add(coreLocation(archiveString, conf)); } for (String fileString : fileStrings) { thisSubject = coreLocation(fileString, conf); if (archivesSet.contains(thisSubject)) { throw new InvalidJobConfException ("The core URI, \"" + thisSubject + "\" is listed both in " + DistributedCache.CACHE_FILES + " and in " + DistributedCache.CACHE_ARCHIVES + " ."); } } } }
public void checkOutputSpecs(JobContext job ) throws FileAlreadyExistsException, IOException{ // Ensure that the output directory is set and not already there Path outDir = getOutputPath(job); if (outDir == null) { throw new InvalidJobConfException("Output directory not set."); } if (outDir.getFileSystem(job.getConfiguration()).exists(outDir)) { throw new FileAlreadyExistsException("Output directory " + outDir + " already exists"); } }