private RuntimeException adapt(RuntimeException e) { if (e.getCause() instanceof AccessControlException) return createFrom((AccessControlException) e.getCause()); if (e.getCause() instanceof LoginException) return createFrom((LoginException) e.getCause()); if (e.getCause() instanceof InvalidInputException) return createFrom((InvalidInputException) e.getCause()); if (e.getCause() instanceof IOException) { RuntimeException ex = createFromOrNull((IOException) e.getCause()); if (ex != null) return ex; } return e; }
protected List<FileStatus> listStatus0(Configuration conf) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); Path[] dirs = new Path[1]; dirs[0] = new Path(StringUtils.unEscapeString(conf.get(INPUT_DIR, ""))); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } // get tokens for all the required FileSystems.. // TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, // job.getConfiguration()); // Whether we need to recursive look into the directory structure boolean recursive = conf.getBoolean(INPUT_DIR_RECURSIVE, false); List<IOException> errors = new ArrayList<IOException>(); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); PathFilter inputFilter = new MultiPathFilter(filters); for (int i=0; i < dirs.length; ++i) { Path p = dirs[i]; FileSystem fs = p.getFileSystem(conf); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat: matches) { if (globStat.isDirectory()) { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(globStat.getPath()); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (recursive && stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } return result; }
private List<FileStatus> simpleListStatus(JobContext job, Path[] dirs, PathFilter inputFilter, boolean recursive) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); List<IOException> errors = new ArrayList<IOException>(); Configuration conf = job.getConfiguration(); for (int i=0; i < dirs.length; ++i) { Path p = dirs[i]; FileSystem fs = p.getFileSystem(conf); FileStatus[] matches; try { matches = fs.globStatus(p, inputFilter); } catch (IllegalArgumentException e) { errors.add(new IOException(e.getMessage())); continue; } if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat: matches) { if (globStat.isDirectory()) { FileStatus[] files = fs.listStatus(globStat.getPath(), inputFilter); for (int j = 0; j < files.length; j++) { if (recursive && files[j].isDirectory()) { simpleAddInputPathRecursively(result, fs, files[j].getPath(),inputFilter); } else { result.add(files[j]); } } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } return result; }
private List<FileStatus> getInitialSplits(JobContext job) throws IOException { String directoryBlackList = job.getConfiguration() .get(ReplicationJob.DIRECTORY_BLACKLIST_REGEX); boolean nofilter = job.getConfiguration().getBoolean(NO_HIDDEN_FILE_FILTER, false); ArrayList result = new ArrayList(); Path[] dirs = getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } else { ArrayList errors = new ArrayList(); for (int i = 0; i < dirs.length; ++i) { Path path = dirs[i]; Configuration conf = job.getConfiguration(); FileSystem fs = path.getFileSystem(conf); FileStatus[] matches = nofilter ? fs.globStatus(path) : fs.globStatus(path, hiddenFileFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + path)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + path + " matches 0 files")); } else { for (FileStatus globStat : matches) { if (globStat.isDirectory()) { if (directoryBlackList == null || !globStat.getPath().getName().matches(directoryBlackList)) { result.add(globStat); } } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } else { LOG.info("Total input directory to process : " + result.size()); return result; } } }
private TalendRuntimeException createFrom(InvalidInputException cause) { // TODO: more specific method for file not found errors. if (accessType == AccessType.Read) return SimpleFileIOErrorCode.createInputNotAuthorized(cause, username, path); return SimpleFileIOErrorCode.createOutputNotAuthorized(cause, username, path); }
/** * List input directories. * Subclasses may override to, e.g., select only files matching a regular expression. * * @param job * the job to list input paths for * @return array of FileStatus objects * @throws IOException * if zero items. * @throws InvalidInputException * If any IOException for input files. */ @SuppressWarnings("deprecation") protected List<FileStatus> listStatus(Configuration conf, String input) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); Path[] dirs = getInputPaths(input); if(dirs.length == 0) { throw new IOException("No input paths specified in job"); } List<IOException> errors = new ArrayList<IOException>(); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(hiddenFileFilter); PathFilter inputFilter = new MultiPathFilter(filters); for(int i = 0; i < dirs.length; ++i) { Path p = dirs[i]; FileSystem fs = p.getFileSystem(conf); FileStatus[] matches = fs.globStatus(p, inputFilter); if(matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if(matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for(FileStatus globStat: matches) { if(globStat.isDir()) { for(FileStatus stat: fs.listStatus(globStat.getPath(), inputFilter)) { result.add(stat); } } else { result.add(globStat); } } } } if(!errors.isEmpty()) { throw new InvalidInputException(errors); } return result; }
/** * copy from FileInputFormat: add assignment to table file numbers */ @Override public List<FileStatus> listStatus(JobContext jobContext) throws IOException { Configuration job = jobContext.getConfiguration(); Path[] dirs = getInputPaths(jobContext); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } List<FileStatus> result = new ArrayList<FileStatus>(); List<IOException> errors = new ArrayList<IOException>(); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(hiddenFileFilter); PathFilter jobFilter = getInputPathFilter(jobContext); if (jobFilter != null) { filters.add(jobFilter); } PathFilter inputFilter = new MultiPathFilter(filters); ArrayList<Integer> fileNumberList = new ArrayList<Integer>(); int index = 0; for (Path p: dirs) { FileSystem fs = p.getFileSystem(job); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat: matches) { if (globStat.isDir()) { FileStatus[] fileStatuses = fs.listStatus(globStat.getPath(), inputFilter); // reorder according to CG index BasicTable.Reader reader = readers.get(index); if (fileStatuses.length > 1) reader.rearrangeFileIndices(fileStatuses); for(FileStatus stat: fileStatuses) { if (stat != null) result.add(stat); } fileNumberList.add(fileStatuses.length); } else { result.add(globStat); fileNumberList.add(1); } } } index++; } fileNumbers = new Integer[fileNumberList.size()]; fileNumberList.toArray(fileNumbers); if (!errors.isEmpty()) { throw new InvalidInputException(errors); } LOG.info("Total input paths to process : " + result.size()); return result; }
@Test public void testNonExistingInputPathInSkewJoin() throws Exception { String script = "exists = LOAD '" + INPUT_FILE2 + "' AS (a:long, x:chararray);" + "missing = LOAD '/non/existing/directory' AS (a:long);" + "missing = FOREACH ( GROUP missing BY a ) GENERATE $0 AS a, COUNT_STAR($1);" + "joined = JOIN exists BY a, missing BY a USING 'skewed';"; String logFile = Util.createTempFileDelOnExit("tmp", ".log").getAbsolutePath(); Logger logger = Logger.getLogger("org.apache.pig"); logger.setLevel(Level.INFO); SimpleLayout layout = new SimpleLayout(); FileAppender appender = new FileAppender(layout, logFile.toString(), false, false, 0); logger.addAppender(appender); try { pigServer.registerScript(new ByteArrayInputStream(script.getBytes("UTF-8"))); pigServer.openIterator("joined"); } catch (Exception e) { boolean foundInvalidInputException = false; // Search through chained exceptions for InvalidInputException. If // input splits are calculated on the front-end, we will see this // exception in the stack trace. Throwable cause = e.getCause(); while (cause != null) { if (cause instanceof InvalidInputException) { foundInvalidInputException = true; break; } cause = cause.getCause(); } // InvalidInputException was not found in the stack trace. But it's // possible that the exception was thrown in the back-end, and Pig // couldn't retrieve it in the front-end. To be safe, search the log // file before declaring a failure. if (!foundInvalidInputException) { FileInputStream fis = new FileInputStream(new File(logFile)); int bytes = fis.available(); byte[] buffer = new byte[bytes]; fis.read(buffer); String str = new String(buffer, "UTF-8"); if (str.contains(InvalidInputException.class.getName())) { foundInvalidInputException = true; } fis.close(); } assertTrue("This exception was not caused by InvalidInputException: " + e, foundInvalidInputException); } finally { LogManager.resetConfiguration(); } }
/** * Common method for listing vertex/edge input directories. * * @param job The job * @param dirs list of vertex/edge input paths * @return Array of FileStatus objects * @throws IOException */ private List<FileStatus> listStatus(JobContext job, Path[] dirs) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } /*if[HADOOP_NON_SECURE] else[HADOOP_NON_SECURE] // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration()); end[HADOOP_NON_SECURE]*/ List<IOException> errors = new ArrayList<IOException>(); // creates a MultiPathFilter with the HIDDEN_FILE_FILTER and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(HIDDEN_FILE_FILTER); PathFilter jobFilter = getInputPathFilter(job); if (jobFilter != null) { filters.add(jobFilter); } PathFilter inputFilter = new MultiPathFilter(filters); for (Path p : dirs) { FileSystem fs = p.getFileSystem(job.getConfiguration()); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat: matches) { if (globStat.isDir()) { Collections.addAll(result, fs.listStatus(globStat.getPath())); } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } LOG.info("Total input paths to process : " + result.size()); return result; }
/** * Common method for listing vertex/edge input directories. * * @param job The job * @param dirs list of vertex/edge input paths * @return Array of FileStatus objects * @throws IOException */ private List<FileStatus> listStatus(JobContext job, Path[] dirs) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration()); List<IOException> errors = new ArrayList<IOException>(); // creates a MultiPathFilter with the HIDDEN_FILE_FILTER and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(HIDDEN_FILE_FILTER); PathFilter jobFilter = getInputPathFilter(job); if (jobFilter != null) { filters.add(jobFilter); } PathFilter inputFilter = new MultiPathFilter(filters); for (Path p : dirs) { FileSystem fs = p.getFileSystem(job.getConfiguration()); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat: matches) { if (globStat.isDir()) { Collections.addAll(result, fs.listStatus(globStat.getPath())); } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } LOG.info("Total input paths to process : " + result.size()); return result; }