Java 类org.apache.hadoop.mapreduce.lib.input.InvalidInputException 实例源码

项目:components    文件:UgiExceptionHandler.java   
private RuntimeException adapt(RuntimeException e) {
    if (e.getCause() instanceof AccessControlException)
        return createFrom((AccessControlException) e.getCause());
    if (e.getCause() instanceof LoginException)
        return createFrom((LoginException) e.getCause());
    if (e.getCause() instanceof InvalidInputException)
        return createFrom((InvalidInputException) e.getCause());
    if (e.getCause() instanceof IOException) {
        RuntimeException ex = createFromOrNull((IOException) e.getCause());
        if (ex != null)
            return ex;
    }
    return e;
}
项目:angel    文件:BalanceInputFormat.java   
protected List<FileStatus> listStatus0(Configuration conf) throws IOException {
  List<FileStatus> result = new ArrayList<FileStatus>();
  Path[] dirs = new Path[1];
  dirs[0] = new Path(StringUtils.unEscapeString(conf.get(INPUT_DIR, "")));
  if (dirs.length == 0) {
    throw new IOException("No input paths specified in job");
  }

  // get tokens for all the required FileSystems..
  //    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs,
  //      job.getConfiguration());

  // Whether we need to recursive look into the directory structure
  boolean recursive = conf.getBoolean(INPUT_DIR_RECURSIVE, false);

  List<IOException> errors = new ArrayList<IOException>();

  // creates a MultiPathFilter with the hiddenFileFilter and the
  // user provided one (if any).
  List<PathFilter> filters = new ArrayList<PathFilter>();

  PathFilter inputFilter = new MultiPathFilter(filters);

  for (int i=0; i < dirs.length; ++i) {
    Path p = dirs[i];
    FileSystem fs = p.getFileSystem(conf);
    FileStatus[] matches = fs.globStatus(p, inputFilter);
    if (matches == null) {
      errors.add(new IOException("Input path does not exist: " + p));
    } else if (matches.length == 0) {
      errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
    } else {
      for (FileStatus globStat: matches) {
        if (globStat.isDirectory()) {
          RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(globStat.getPath());
          while (iter.hasNext()) {
            LocatedFileStatus stat = iter.next();
            if (inputFilter.accept(stat.getPath())) {
              if (recursive && stat.isDirectory()) {
                addInputPathRecursively(result, fs, stat.getPath(),
                  inputFilter);
              } else {
                result.add(stat);
              }
            }
          }
        } else {
          result.add(globStat);
        }
      }
    }
  }

  if (!errors.isEmpty()) {
    throw new InvalidInputException(errors);
  }

  return result;
}
项目:marklogic-contentpump    文件:FileAndDirectoryInputFormat.java   
private List<FileStatus> simpleListStatus(JobContext job, Path[] dirs,
        PathFilter inputFilter, boolean recursive) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    List<IOException> errors = new ArrayList<IOException>();
    Configuration conf = job.getConfiguration();
    for (int i=0; i < dirs.length; ++i) {
        Path p = dirs[i];
        FileSystem fs = p.getFileSystem(conf);
        FileStatus[] matches;
        try {
            matches = fs.globStatus(p, inputFilter);
        } catch (IllegalArgumentException e) {
            errors.add(new IOException(e.getMessage()));
            continue;
        }
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat: matches) {
                if (globStat.isDirectory()) {
                    FileStatus[] files = fs.listStatus(globStat.getPath(), inputFilter);
                    for (int j = 0; j < files.length; j++) {
                        if (recursive && files[j].isDirectory()) {
                            simpleAddInputPathRecursively(result, fs, files[j].getPath(),inputFilter);
                        } else {
                            result.add(files[j]);
                        }
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    return result;
}
项目:reair    文件:DirScanInputFormat.java   
private List<FileStatus> getInitialSplits(JobContext job) throws IOException {
  String directoryBlackList = job.getConfiguration()
      .get(ReplicationJob.DIRECTORY_BLACKLIST_REGEX);
  boolean nofilter = job.getConfiguration().getBoolean(NO_HIDDEN_FILE_FILTER, false);
  ArrayList result = new ArrayList();
  Path[] dirs = getInputPaths(job);
  if (dirs.length == 0) {
    throw new IOException("No input paths specified in job");
  } else {
    ArrayList errors = new ArrayList();

    for (int i = 0; i < dirs.length; ++i) {
      Path path = dirs[i];
      Configuration conf = job.getConfiguration();
      FileSystem fs = path.getFileSystem(conf);
      FileStatus[] matches = nofilter ? fs.globStatus(path)
                                      : fs.globStatus(path, hiddenFileFilter);
      if (matches == null) {
        errors.add(new IOException("Input path does not exist: " + path));
      } else if (matches.length == 0) {
        errors.add(new IOException("Input Pattern " + path + " matches 0 files"));
      } else {
        for (FileStatus globStat : matches) {
          if (globStat.isDirectory()) {
            if (directoryBlackList == null
                || !globStat.getPath().getName().matches(directoryBlackList)) {
              result.add(globStat);
            }
          }
        }
      }
    }

    if (!errors.isEmpty()) {
      throw new InvalidInputException(errors);
    } else {
      LOG.info("Total input directory to process : " + result.size());
      return result;
    }
  }
}
项目:components    文件:UgiExceptionHandler.java   
private TalendRuntimeException createFrom(InvalidInputException cause) {
    // TODO: more specific method for file not found errors.
    if (accessType == AccessType.Read)
        return SimpleFileIOErrorCode.createInputNotAuthorized(cause, username, path);
    return SimpleFileIOErrorCode.createOutputNotAuthorized(cause, username, path);
}
项目:guagua    文件:GuaguaMRUnitDriver.java   
/**
 * List input directories.
 * Subclasses may override to, e.g., select only files matching a regular expression.
 * 
 * @param job
 *            the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException
 *             if zero items.
 * @throws InvalidInputException
 *             If any IOException for input files.
 */
@SuppressWarnings("deprecation")
protected List<FileStatus> listStatus(Configuration conf, String input) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    Path[] dirs = getInputPaths(input);
    if(dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }

    List<IOException> errors = new ArrayList<IOException>();

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    PathFilter inputFilter = new MultiPathFilter(filters);

    for(int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];
        FileSystem fs = p.getFileSystem(conf);
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if(matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if(matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for(FileStatus globStat: matches) {
                if(globStat.isDir()) {
                    for(FileStatus stat: fs.listStatus(globStat.getPath(), inputFilter)) {
                        result.add(stat);
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if(!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    return result;
}
项目:spork-streaming    文件:TableInputFormat.java   
/**
 * copy from FileInputFormat: add assignment to table file numbers
 */
@Override
public List<FileStatus> listStatus(JobContext jobContext) throws IOException {
  Configuration job = jobContext.getConfiguration();
  Path[] dirs = getInputPaths(jobContext);
  if (dirs.length == 0) {
    throw new IOException("No input paths specified in job");
  }

  List<FileStatus> result = new ArrayList<FileStatus>();
  List<IOException> errors = new ArrayList<IOException>();

  // creates a MultiPathFilter with the hiddenFileFilter and the
  // user provided one (if any).
  List<PathFilter> filters = new ArrayList<PathFilter>();
  filters.add(hiddenFileFilter);
  PathFilter jobFilter = getInputPathFilter(jobContext);
  if (jobFilter != null) {
    filters.add(jobFilter);
  }
  PathFilter inputFilter = new MultiPathFilter(filters);

  ArrayList<Integer> fileNumberList  = new ArrayList<Integer>();
  int index = 0;
  for (Path p: dirs) {
    FileSystem fs = p.getFileSystem(job); 
    FileStatus[] matches = fs.globStatus(p, inputFilter);
    if (matches == null) {
      errors.add(new IOException("Input path does not exist: " + p));
    } else if (matches.length == 0) {
      errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
    } else {
      for (FileStatus globStat: matches) {
        if (globStat.isDir()) {
          FileStatus[] fileStatuses = fs.listStatus(globStat.getPath(), inputFilter);
          // reorder according to CG index
          BasicTable.Reader reader = readers.get(index);
          if (fileStatuses.length > 1)
            reader.rearrangeFileIndices(fileStatuses);
          for(FileStatus stat: fileStatuses) {
            if (stat != null)
              result.add(stat);
          }
          fileNumberList.add(fileStatuses.length);
        } else {
          result.add(globStat);
          fileNumberList.add(1);
        }
      }
    }
    index++;
  }
  fileNumbers = new Integer[fileNumberList.size()];
  fileNumberList.toArray(fileNumbers);

  if (!errors.isEmpty()) {
    throw new InvalidInputException(errors);
  }
  LOG.info("Total input paths to process : " + result.size()); 
  return result;
}
项目:spork    文件:TestSkewedJoin.java   
@Test
public void testNonExistingInputPathInSkewJoin() throws Exception {
    String script =
      "exists = LOAD '" + INPUT_FILE2 + "' AS (a:long, x:chararray);" +
      "missing = LOAD '/non/existing/directory' AS (a:long);" +
      "missing = FOREACH ( GROUP missing BY a ) GENERATE $0 AS a, COUNT_STAR($1);" +
      "joined = JOIN exists BY a, missing BY a USING 'skewed';";

    String logFile = Util.createTempFileDelOnExit("tmp", ".log").getAbsolutePath();
    Logger logger = Logger.getLogger("org.apache.pig");
    logger.setLevel(Level.INFO);
    SimpleLayout layout = new SimpleLayout();
    FileAppender appender = new FileAppender(layout, logFile.toString(), false, false, 0);
    logger.addAppender(appender);

    try {
        pigServer.registerScript(new ByteArrayInputStream(script.getBytes("UTF-8")));
        pigServer.openIterator("joined");
    } catch (Exception e) {
        boolean foundInvalidInputException = false;

        // Search through chained exceptions for InvalidInputException. If
        // input splits are calculated on the front-end, we will see this
        // exception in the stack trace.
        Throwable cause = e.getCause();
        while (cause != null) {
            if (cause instanceof InvalidInputException) {
                foundInvalidInputException = true;
                break;
            }
            cause = cause.getCause();
        }

        // InvalidInputException was not found in the stack trace. But it's
        // possible that the exception was thrown in the back-end, and Pig
        // couldn't retrieve it in the front-end. To be safe, search the log
        // file before declaring a failure.
        if (!foundInvalidInputException) {
            FileInputStream fis = new FileInputStream(new File(logFile));
            int bytes = fis.available();
            byte[] buffer = new byte[bytes];
            fis.read(buffer);
            String str = new String(buffer, "UTF-8");
            if (str.contains(InvalidInputException.class.getName())) {
                foundInvalidInputException = true;
            }
            fis.close();
        }

        assertTrue("This exception was not caused by InvalidInputException: " + e,
                foundInvalidInputException);
    } finally {
        LogManager.resetConfiguration();
    }
}
项目:giraph-gora    文件:GiraphFileInputFormat.java   
/**
   * Common method for listing vertex/edge input directories.
   *
   * @param job The job
   * @param dirs list of vertex/edge input paths
   * @return Array of FileStatus objects
   * @throws IOException
   */
  private List<FileStatus> listStatus(JobContext job, Path[] dirs)
    throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    if (dirs.length == 0) {
      throw new IOException("No input paths specified in job");
    }

/*if[HADOOP_NON_SECURE]
else[HADOOP_NON_SECURE]
    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs,
        job.getConfiguration());
end[HADOOP_NON_SECURE]*/

    List<IOException> errors = new ArrayList<IOException>();

    // creates a MultiPathFilter with the HIDDEN_FILE_FILTER and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(HIDDEN_FILE_FILTER);
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
      filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);

    for (Path p : dirs) {
      FileSystem fs = p.getFileSystem(job.getConfiguration());
      FileStatus[] matches = fs.globStatus(p, inputFilter);
      if (matches == null) {
        errors.add(new IOException("Input path does not exist: " + p));
      } else if (matches.length == 0) {
        errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
      } else {
        for (FileStatus globStat: matches) {
          if (globStat.isDir()) {
            Collections.addAll(result, fs.listStatus(globStat.getPath()));
          } else {
            result.add(globStat);
          }
        }
      }
    }

    if (!errors.isEmpty()) {
      throw new InvalidInputException(errors);
    }
    LOG.info("Total input paths to process : " + result.size());
    return result;
  }
项目:giraph-research    文件:GiraphFileInputFormat.java   
/**
 * Common method for listing vertex/edge input directories.
 *
 * @param job The job
 * @param dirs list of vertex/edge input paths
 * @return Array of FileStatus objects
 * @throws IOException
 */
private List<FileStatus> listStatus(JobContext job, Path[] dirs)
  throws IOException {
  List<FileStatus> result = new ArrayList<FileStatus>();
  if (dirs.length == 0) {
    throw new IOException("No input paths specified in job");
  }







  // get tokens for all the required FileSystems..
  TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs,
      job.getConfiguration());


  List<IOException> errors = new ArrayList<IOException>();

  // creates a MultiPathFilter with the HIDDEN_FILE_FILTER and the
  // user provided one (if any).
  List<PathFilter> filters = new ArrayList<PathFilter>();
  filters.add(HIDDEN_FILE_FILTER);
  PathFilter jobFilter = getInputPathFilter(job);
  if (jobFilter != null) {
    filters.add(jobFilter);
  }
  PathFilter inputFilter = new MultiPathFilter(filters);

  for (Path p : dirs) {
    FileSystem fs = p.getFileSystem(job.getConfiguration());
    FileStatus[] matches = fs.globStatus(p, inputFilter);
    if (matches == null) {
      errors.add(new IOException("Input path does not exist: " + p));
    } else if (matches.length == 0) {
      errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
    } else {
      for (FileStatus globStat: matches) {
        if (globStat.isDir()) {
          Collections.addAll(result, fs.listStatus(globStat.getPath()));
        } else {
          result.add(globStat);
        }
      }
    }
  }

  if (!errors.isEmpty()) {
    throw new InvalidInputException(errors);
  }
  LOG.info("Total input paths to process : " + result.size());
  return result;
}
项目:giraph-research    文件:GiraphFileInputFormat.java   
/**
 * Common method for listing vertex/edge input directories.
 *
 * @param job The job
 * @param dirs list of vertex/edge input paths
 * @return Array of FileStatus objects
 * @throws IOException
 */
private List<FileStatus> listStatus(JobContext job, Path[] dirs)
  throws IOException {
  List<FileStatus> result = new ArrayList<FileStatus>();
  if (dirs.length == 0) {
    throw new IOException("No input paths specified in job");
  }







  // get tokens for all the required FileSystems..
  TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs,
      job.getConfiguration());


  List<IOException> errors = new ArrayList<IOException>();

  // creates a MultiPathFilter with the HIDDEN_FILE_FILTER and the
  // user provided one (if any).
  List<PathFilter> filters = new ArrayList<PathFilter>();
  filters.add(HIDDEN_FILE_FILTER);
  PathFilter jobFilter = getInputPathFilter(job);
  if (jobFilter != null) {
    filters.add(jobFilter);
  }
  PathFilter inputFilter = new MultiPathFilter(filters);

  for (Path p : dirs) {
    FileSystem fs = p.getFileSystem(job.getConfiguration());
    FileStatus[] matches = fs.globStatus(p, inputFilter);
    if (matches == null) {
      errors.add(new IOException("Input path does not exist: " + p));
    } else if (matches.length == 0) {
      errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
    } else {
      for (FileStatus globStat: matches) {
        if (globStat.isDir()) {
          Collections.addAll(result, fs.listStatus(globStat.getPath()));
        } else {
          result.add(globStat);
        }
      }
    }
  }

  if (!errors.isEmpty()) {
    throw new InvalidInputException(errors);
  }
  LOG.info("Total input paths to process : " + result.size());
  return result;
}
项目:giraph-research    文件:GiraphFileInputFormat.java   
/**
   * Common method for listing vertex/edge input directories.
   *
   * @param job The job
   * @param dirs list of vertex/edge input paths
   * @return Array of FileStatus objects
   * @throws IOException
   */
  private List<FileStatus> listStatus(JobContext job, Path[] dirs)
    throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    if (dirs.length == 0) {
      throw new IOException("No input paths specified in job");
    }

/*if[HADOOP_NON_SECURE]
else[HADOOP_NON_SECURE]
    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs,
        job.getConfiguration());
end[HADOOP_NON_SECURE]*/

    List<IOException> errors = new ArrayList<IOException>();

    // creates a MultiPathFilter with the HIDDEN_FILE_FILTER and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(HIDDEN_FILE_FILTER);
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
      filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);

    for (Path p : dirs) {
      FileSystem fs = p.getFileSystem(job.getConfiguration());
      FileStatus[] matches = fs.globStatus(p, inputFilter);
      if (matches == null) {
        errors.add(new IOException("Input path does not exist: " + p));
      } else if (matches.length == 0) {
        errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
      } else {
        for (FileStatus globStat: matches) {
          if (globStat.isDir()) {
            Collections.addAll(result, fs.listStatus(globStat.getPath()));
          } else {
            result.add(globStat);
          }
        }
      }
    }

    if (!errors.isEmpty()) {
      throw new InvalidInputException(errors);
    }
    LOG.info("Total input paths to process : " + result.size());
    return result;
  }
项目:sedge    文件:TableInputFormat.java   
/**
 * copy from FileInputFormat: add assignment to table file numbers
 */
@Override
public List<FileStatus> listStatus(JobContext jobContext) throws IOException {
  Configuration job = jobContext.getConfiguration();
  Path[] dirs = getInputPaths(jobContext);
  if (dirs.length == 0) {
    throw new IOException("No input paths specified in job");
  }

  List<FileStatus> result = new ArrayList<FileStatus>();
  List<IOException> errors = new ArrayList<IOException>();

  // creates a MultiPathFilter with the hiddenFileFilter and the
  // user provided one (if any).
  List<PathFilter> filters = new ArrayList<PathFilter>();
  filters.add(hiddenFileFilter);
  PathFilter jobFilter = getInputPathFilter(jobContext);
  if (jobFilter != null) {
    filters.add(jobFilter);
  }
  PathFilter inputFilter = new MultiPathFilter(filters);

  ArrayList<Integer> fileNumberList  = new ArrayList<Integer>();
  int index = 0;
  for (Path p: dirs) {
    FileSystem fs = p.getFileSystem(job); 
    FileStatus[] matches = fs.globStatus(p, inputFilter);
    if (matches == null) {
      errors.add(new IOException("Input path does not exist: " + p));
    } else if (matches.length == 0) {
      errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
    } else {
      for (FileStatus globStat: matches) {
        if (globStat.isDir()) {
          FileStatus[] fileStatuses = fs.listStatus(globStat.getPath(), inputFilter);
          // reorder according to CG index
          BasicTable.Reader reader = readers.get(index);
          if (fileStatuses.length > 1)
            reader.rearrangeFileIndices(fileStatuses);
          for(FileStatus stat: fileStatuses) {
            if (stat != null)
              result.add(stat);
          }
          fileNumberList.add(fileStatuses.length);
        } else {
          result.add(globStat);
          fileNumberList.add(1);
        }
      }
    }
    index++;
  }
  fileNumbers = new Integer[fileNumberList.size()];
  fileNumberList.toArray(fileNumbers);

  if (!errors.isEmpty()) {
    throw new InvalidInputException(errors);
  }
  LOG.info("Total input paths to process : " + result.size()); 
  return result;
}