/** * Assert that the number of log files in the target directory is as expected. * @param fs the target FileSystem * @param dir the target directory path * @param expected the expected number of files * @throws IOException thrown if listing files fails */ public void assertFileCount(FileSystem fs, Path dir, int expected) throws IOException { RemoteIterator<LocatedFileStatus> i = fs.listFiles(dir, true); int count = 0; while (i.hasNext()) { i.next(); count++; } assertTrue("The sink created additional unexpected log files. " + count + "files were created", expected >= count); assertTrue("The sink created too few log files. " + count + "files were " + "created", expected <= count); }
/** * Return the next ID suffix to use when creating the log file. This method * will look at the files in the directory, find the one with the highest * ID suffix, and 1 to that suffix, and return it. This approach saves a full * linear probe, which matters in the case where there are a large number of * log files. * * @param initial the base file path * @param lastId the last ID value that was used * @return the next ID to try * @throws IOException thrown if there's an issue querying the files in the * directory */ private int getNextIdToTry(Path initial, int lastId) throws IOException { RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(currentDirPath, true); String base = initial.toString(); int id = lastId; while (files.hasNext()) { String file = files.next().getPath().getName(); if (file.startsWith(base)) { int fileId = extractId(file); if (fileId > id) { id = fileId; } } } // Return either 1 more than the highest we found or 1 more than the last // ID used (if no ID was found). return id + 1; }
@Override public RemoteIterator<LocatedFileStatus>listLocatedStatus(final Path f, final PathFilter filter) throws FileNotFoundException, IOException { final InodeTree.ResolveResult<FileSystem> res = fsState .resolve(getUriPath(f), true); final RemoteIterator<LocatedFileStatus> statusIter = res.targetFileSystem .listLocatedStatus(res.remainingPath); if (res.isInternalDir()) { return statusIter; } return new RemoteIterator<LocatedFileStatus>() { @Override public boolean hasNext() throws IOException { return statusIter.hasNext(); } @Override public LocatedFileStatus next() throws IOException { final LocatedFileStatus status = statusIter.next(); return (LocatedFileStatus)fixFileStatus(status, getChrootedPath(res, status, f)); } }; }
@Test public void testCopyRecursive() throws Throwable { int expected = createTestFiles(sourceDir, 64); expectSuccess( "-s", sourceDir.toURI().toString(), "-d", destDir.toURI().toString(), "-t", "4", "-l", "3"); LocalFileSystem local = FileSystem.getLocal(new Configuration()); Set<String> entries = new TreeSet<>(); RemoteIterator<LocatedFileStatus> iterator = local.listFiles(new Path(destDir.toURI()), true); int count = 0; while (iterator.hasNext()) { LocatedFileStatus next = iterator.next(); entries.add(next.getPath().toUri().toString()); LOG.info("Entry {} size = {}", next.getPath(), next.getLen()); count++; } assertEquals("Mismatch in files found", expected, count); }
/** * Get all ORC files present in directory for the specified table and partition/bucket. The ORC * files returned are in ascending order of the (insertion) time-partition and sequence-id within * the time-partition. * * @param orcDir the ORC store directory * @param args the arguments in order: table-name, bucket-id, time-partition-id * @return the list of all ORC files */ private String[] getOrcFiles(final String orcDir, final String fileExt, final String... args) { try { FileSystem fileSystem = FileSystem.get(conf); Path distributedPath = new Path(Paths.get(orcDir, args).toString()); ArrayList<String> filePathStrings = new ArrayList<>(); if (fileSystem.exists(distributedPath)) { RemoteIterator<LocatedFileStatus> fileListItr = fileSystem.listFiles(distributedPath, true); while (fileListItr != null && fileListItr.hasNext()) { LocatedFileStatus file = fileListItr.next(); if (!file.getPath().getName().endsWith(fileExt)) { // exclude CRC files filePathStrings.add(file.getPath().toUri().toString()); } } Collections.sort(filePathStrings); } String[] retArray = new String[filePathStrings.size()]; filePathStrings.toArray(retArray); return retArray; } catch (IOException e) { e.printStackTrace(); } return new String[0]; }
public int getFilesCount(String storeBaseDir, String tableName) { int filesCount = 0; try { FileSystem fs = FileSystem.get(conf); Path storeBasePath = new Path(fs.getHomeDirectory(), storeBaseDir); Path tablePath = new Path(storeBasePath, tableName); if (fs.exists(tablePath)) { RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator = fs.listFiles(tablePath, false); while (locatedFileStatusRemoteIterator.hasNext()) { filesCount++; LocatedFileStatus next = locatedFileStatusRemoteIterator.next(); System.out.println("File name is " + next.getPath()); } } } catch (IOException e) { e.printStackTrace(); } return filesCount; }
public List<OrcStruct> getORCRecords(String storeBaseDir, String tableName) throws IOException { List<OrcStruct> orcrecords = new ArrayList<>(); try { FileSystem fs = FileSystem.get(conf); Path storeBasePath = new Path(fs.getHomeDirectory(), storeBaseDir); Path tablePath = new Path(storeBasePath, tableName); if (fs.exists(tablePath)) { RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator = fs.listFiles(tablePath, false); while (locatedFileStatusRemoteIterator.hasNext()) { LocatedFileStatus next = locatedFileStatusRemoteIterator.next(); final org.apache.hadoop.hive.ql.io.orc.Reader fis = OrcFile.createReader(next.getPath(), OrcFile.readerOptions(conf)); RecordReader rows = fis.rows(); while (rows.hasNext()) { orcrecords.add((OrcStruct) rows.next(null)); } System.out.println("File name is " + next.getPath()); } } } catch (IOException e) { e.printStackTrace(); } return orcrecords; }
private void deleteLocalDir(FileContext lfs, DeletionService del, String localDir) throws IOException { RemoteIterator<FileStatus> fileStatus = lfs.listStatus(new Path(localDir)); if (fileStatus != null) { while (fileStatus.hasNext()) { FileStatus status = fileStatus.next(); try { if (status.getPath().getName().matches(".*" + ContainerLocalizer.USERCACHE + "_DEL_.*")) { LOG.info("usercache path : " + status.getPath().toString()); cleanUpFilesPerUserDir(lfs, del, status.getPath()); } else if (status.getPath().getName() .matches(".*" + NM_PRIVATE_DIR + "_DEL_.*") || status.getPath().getName() .matches(".*" + ContainerLocalizer.FILECACHE + "_DEL_.*")) { del.delete(null, status.getPath(), new Path[] {}); } } catch (IOException ex) { // Do nothing, just give the warning LOG.warn("Failed to delete this local Directory: " + status.getPath().getName()); } } } }
private void cleanUpFilesPerUserDir(FileContext lfs, DeletionService del, Path userDirPath) throws IOException { RemoteIterator<FileStatus> userDirStatus = lfs.listStatus(userDirPath); FileDeletionTask dependentDeletionTask = del.createFileDeletionTask(null, userDirPath, new Path[] {}); if (userDirStatus != null && userDirStatus.hasNext()) { List<FileDeletionTask> deletionTasks = new ArrayList<FileDeletionTask>(); while (userDirStatus.hasNext()) { FileStatus status = userDirStatus.next(); String owner = status.getOwner(); FileDeletionTask deletionTask = del.createFileDeletionTask(owner, null, new Path[] { status.getPath() }); deletionTask.addFileDeletionTaskDependency(dependentDeletionTask); deletionTasks.add(deletionTask); } for (FileDeletionTask task : deletionTasks) { del.scheduleFileDeletionTask(task); } } else { del.scheduleFileDeletionTask(dependentDeletionTask); } }
/** * Add files in the input path recursively into the results. * @param result * The List to store all files. * @param fs * The FileSystem. * @param path * The input path. * @param inputFilter * The input filter that can be used to filter files/dirs. * @throws IOException */ protected void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path, PathFilter inputFilter) throws IOException { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } }
@Override public Result call() throws Exception { Result result = new Result(); result.fs = fs; if (fileStatus.isDirectory()) { RemoteIterator<LocatedFileStatus> iter = fs .listLocatedStatus(fileStatus.getPath()); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (recursive && stat.isDirectory()) { result.dirsNeedingRecursiveCalls.add(stat); } else { result.locatedFileStatuses.add(stat); } } } } else { result.locatedFileStatuses.add(fileStatus); } return result; }
@VisibleForTesting protected static List<FileStatus> scanDirectory(Path path, FileContext fc, PathFilter pathFilter) throws IOException { path = fc.makeQualified(path); List<FileStatus> jhStatusList = new ArrayList<FileStatus>(); try { RemoteIterator<FileStatus> fileStatusIter = fc.listStatus(path); while (fileStatusIter.hasNext()) { FileStatus fileStatus = fileStatusIter.next(); Path filePath = fileStatus.getPath(); if (fileStatus.isFile() && pathFilter.accept(filePath)) { jhStatusList.add(fileStatus); } } } catch (FileNotFoundException fe) { LOG.error("Error while scanning directory " + path, fe); } return jhStatusList; }
@Override public int run(Configuration conf, List<String> args) throws IOException { if (!args.isEmpty()) { System.err.println("Can't understand argument: " + args.get(0)); return 1; } final DistributedFileSystem dfs = AdminHelper.getDFS(conf); try { final TableListing listing = new TableListing.Builder() .addField("").addField("", true) .wrapWidth(AdminHelper.MAX_LINE_WIDTH).hideHeaders().build(); final RemoteIterator<EncryptionZone> it = dfs.listEncryptionZones(); while (it.hasNext()) { EncryptionZone ez = it.next(); listing.addRow(ez.getPath(), ez.getKeyName()); } System.out.println(listing.toString()); } catch (IOException e) { System.err.println(prettifyException(e)); return 2; } return 0; }
@Test(timeout=60000) public void testListFiles() throws IOException { Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); try { DistributedFileSystem fs = cluster.getFileSystem(); final Path relative = new Path("relative"); fs.create(new Path(relative, "foo")).close(); final List<LocatedFileStatus> retVal = new ArrayList<LocatedFileStatus>(); final RemoteIterator<LocatedFileStatus> iter = fs.listFiles(relative, true); while (iter.hasNext()) { retVal.add(iter.next()); } System.out.println("retVal = " + retVal); } finally { cluster.shutdown(); } }
/** Test when input path is a file */ @Test public void testFile() throws IOException { fc.mkdir(TEST_DIR, FsPermission.getDefault(), true); writeFile(fc, FILE1, FILE_LEN); RemoteIterator<LocatedFileStatus> itor = fc.util().listFiles( FILE1, true); LocatedFileStatus stat = itor.next(); assertFalse(itor.hasNext()); assertTrue(stat.isFile()); assertEquals(FILE_LEN, stat.getLen()); assertEquals(fc.makeQualified(FILE1), stat.getPath()); assertEquals(1, stat.getBlockLocations().length); itor = fc.util().listFiles(FILE1, false); stat = itor.next(); assertFalse(itor.hasNext()); assertTrue(stat.isFile()); assertEquals(FILE_LEN, stat.getLen()); assertEquals(fc.makeQualified(FILE1), stat.getPath()); assertEquals(1, stat.getBlockLocations().length); }
@Override boolean checkNamenodeBeforeReturn() throws Exception { for (int i = 0; i < CHECKTIMES; i++) { RemoteIterator<CacheDirectiveEntry> iter = dfs.listCacheDirectives( new CacheDirectiveInfo.Builder(). setPool(directive.getPool()). setPath(directive.getPath()). build()); if (iter.hasNext()) { return true; } Thread.sleep(1000); } return false; }
@Override boolean checkNamenodeBeforeReturn() throws Exception { for (int i = 0; i < CHECKTIMES; i++) { RemoteIterator<CacheDirectiveEntry> iter = dfs.listCacheDirectives( new CacheDirectiveInfo.Builder(). setPool(directive.getPool()). setPath(directive.getPath()). build()); while (iter.hasNext()) { CacheDirectiveInfo result = iter.next().getInfo(); if ((result.getId() == id) && (result.getReplication().shortValue() == newReplication)) { return true; } } Thread.sleep(1000); } return false; }
@Override boolean checkNamenodeBeforeReturn() throws Exception { for (int i = 0; i < CHECKTIMES; i++) { RemoteIterator<CacheDirectiveEntry> iter = dfs.listCacheDirectives( new CacheDirectiveInfo.Builder(). setPool(directive.getPool()). setPath(directive.getPath()). build()); if (!iter.hasNext()) { return true; } Thread.sleep(1000); } return false; }
@SuppressWarnings("unchecked") private void listCachePools( HashSet<String> poolNames, int active) throws Exception { HashSet<String> tmpNames = (HashSet<String>)poolNames.clone(); RemoteIterator<CachePoolEntry> pools = dfs.listCachePools(); int poolCount = poolNames.size(); for (int i=0; i<poolCount; i++) { CachePoolEntry pool = pools.next(); String pollName = pool.getInfo().getPoolName(); assertTrue("The pool name should be expected", tmpNames.remove(pollName)); if (i % 2 == 0) { int standby = active; active = (standby == 0) ? 1 : 0; cluster.transitionToStandby(standby); cluster.transitionToActive(active); cluster.waitActive(active); } } assertTrue("All pools must be found", tmpNames.isEmpty()); }
@SuppressWarnings("unchecked") private void listCacheDirectives( HashSet<String> poolNames, int active) throws Exception { HashSet<String> tmpNames = (HashSet<String>)poolNames.clone(); RemoteIterator<CacheDirectiveEntry> directives = dfs.listCacheDirectives(null); int poolCount = poolNames.size(); for (int i=0; i<poolCount; i++) { CacheDirectiveEntry directive = directives.next(); String pollName = directive.getInfo().getPool(); assertTrue("The pool name should be expected", tmpNames.remove(pollName)); if (i % 2 == 0) { int standby = active; active = (standby == 0) ? 1 : 0; cluster.transitionToStandby(standby); cluster.transitionToActive(active); cluster.waitActive(active); } } assertTrue("All pools must be found", tmpNames.isEmpty()); }
private static void checkEquals(RemoteIterator<LocatedFileStatus> i1, RemoteIterator<LocatedFileStatus> i2) throws IOException { while (i1.hasNext()) { assertTrue(i2.hasNext()); // Compare all the fields but the path name, which is relative // to the original path from listFiles. LocatedFileStatus l1 = i1.next(); LocatedFileStatus l2 = i2.next(); assertEquals(l1.getAccessTime(), l2.getAccessTime()); assertEquals(l1.getBlockSize(), l2.getBlockSize()); assertEquals(l1.getGroup(), l2.getGroup()); assertEquals(l1.getLen(), l2.getLen()); assertEquals(l1.getModificationTime(), l2.getModificationTime()); assertEquals(l1.getOwner(), l2.getOwner()); assertEquals(l1.getPermission(), l2.getPermission()); assertEquals(l1.getReplication(), l2.getReplication()); } assertFalse(i2.hasNext()); }
/** Test the FileStatus obtained calling listStatus on a file */ @Test public void testListStatusOnFile() throws IOException { FileStatus[] stats = fs.listStatus(file1); assertEquals(1, stats.length); FileStatus status = stats[0]; assertFalse(file1 + " should be a file", status.isDirectory()); assertEquals(blockSize, status.getBlockSize()); assertEquals(1, status.getReplication()); assertEquals(fileSize, status.getLen()); assertEquals(file1.makeQualified(fs.getUri(), fs.getWorkingDirectory()).toString(), status.getPath().toString()); RemoteIterator<FileStatus> itor = fc.listStatus(file1); status = itor.next(); assertEquals(stats[0], status); assertFalse(file1 + " should be a file", status.isDirectory()); }
/** * Checks that an encryption zone with the specified keyName and path (if not * null) is present. * * @throws IOException if a matching zone could not be found */ public void assertZonePresent(String keyName, String path) throws IOException { final RemoteIterator<EncryptionZone> it = dfsAdmin.listEncryptionZones(); boolean match = false; while (it.hasNext()) { EncryptionZone zone = it.next(); boolean matchKey = (keyName == null); boolean matchPath = (path == null); if (keyName != null && zone.getKeyName().equals(keyName)) { matchKey = true; } if (path != null && zone.getPath().equals(path)) { matchPath = true; } if (matchKey && matchPath) { match = true; break; } } assertTrue("Did not find expected encryption zone with keyName " + keyName + " path " + path, match ); }
/** * To get this project to compile under Hadoop 1, this code needs to be * commented out * * * @param fs filesystem * @param dir dir * @param subdir subdir * @param recursive recurse? * @throws IOException IO problems */ public static void assertListFilesFinds(FileSystem fs, Path dir, Path subdir, boolean recursive) throws IOException { RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(dir, recursive); boolean found = false; int entries = 0; StringBuilder builder = new StringBuilder(); while (iterator.hasNext()) { LocatedFileStatus next = iterator.next(); entries++; builder.append(next.toString()).append('\n'); if (next.getPath().equals(subdir)) { found = true; } } assertTrue("Path " + subdir + " not found in directory " + dir + " : " + " entries=" + entries + " content" + builder.toString(), found); }
static DataStatistics publishPlainDataStatistics(Configuration conf, Path inputDir) throws IOException { FileSystem fs = inputDir.getFileSystem(conf); // obtain input data file statuses long dataSize = 0; long fileCount = 0; RemoteIterator<LocatedFileStatus> iter = fs.listFiles(inputDir, true); PathFilter filter = new Utils.OutputFileUtils.OutputFilesFilter(); while (iter.hasNext()) { LocatedFileStatus lStatus = iter.next(); if (filter.accept(lStatus.getPath())) { dataSize += lStatus.getLen(); ++fileCount; } } // publish the plain data statistics LOG.info("Total size of input data : " + StringUtils.humanReadableInt(dataSize)); LOG.info("Total number of input data files : " + fileCount); return new DataStatistics(dataSize, fileCount, false); }
@Test public void basicClientReadWrite() throws Exception { Path basePath = new Path(temporaryFolder.newFolder().getAbsolutePath()); Path path = ((PathCanonicalizer) clientFS).canonicalizePath(new Path(basePath, "testfile.bytes")); final byte[] randomBytesMoreThanBuffer = new byte[RemoteNodeFileSystem.REMOTE_WRITE_BUFFER_SIZE * 3]; Random r = new Random(); r.nextBytes(randomBytesMoreThanBuffer); try(FSDataOutputStream stream = clientFS.create(path, false)){ stream.write(randomBytesMoreThanBuffer); } RemoteIterator<LocatedFileStatus> iter = client.fileSystem.listFiles(basePath, false); assertEquals(true, iter.hasNext()); LocatedFileStatus status = iter.next(); try(FSDataInputStream in = clientFS.open(status.getPath())){ byte[] back = new byte[randomBytesMoreThanBuffer.length]; int dataRead = in.read(back); assertEquals(back.length, dataRead); assertTrue(Arrays.equals(randomBytesMoreThanBuffer, back)); } client.fileSystem.delete(status.getPath(), false); }
static SortedSet<byte []> readKeysToSearch(final Configuration conf) throws IOException, InterruptedException { Path keysInputDir = new Path(conf.get(SEARCHER_INPUTDIR_KEY)); FileSystem fs = FileSystem.get(conf); SortedSet<byte []> result = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR); if (!fs.exists(keysInputDir)) { throw new FileNotFoundException(keysInputDir.toString()); } if (!fs.isDirectory(keysInputDir)) { throw new UnsupportedOperationException("TODO"); } else { RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(keysInputDir, false); while(iterator.hasNext()) { LocatedFileStatus keyFileStatus = iterator.next(); // Skip "_SUCCESS" file. if (keyFileStatus.getPath().getName().startsWith("_")) continue; result.addAll(readFileToSearch(conf, fs, keyFileStatus)); } } return result; }
void enumerateDir() throws Exception { System.out.println("enumarate dir, path " + path); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); int repfactor = 4; for (int k = 0; k < repfactor; k++) { long start = System.currentTimeMillis(); for (int i = 0; i < size; i++) { // single operation == loop RemoteIterator<LocatedFileStatus> iter = fs.listFiles(path, false); while (iter.hasNext()) { iter.next(); } } long end = System.currentTimeMillis(); double executionTime = ((double) (end - start)); double latency = executionTime * 1000.0 / ((double) size); System.out.println("execution time [ms] " + executionTime); System.out.println("latency [us] " + latency); } fs.close(); }
@Test public void test() throws IOException { Configuration configuration = HadoopClientHelper.createConfiguration(); FileSystem fileSystem = FileSystem.newInstance(configuration); assertNotNull(fileSystem); if (fileSystem.exists(new Path("/test"))) { assertTrue(fileSystem.delete(new Path("/test"), true)); } if (fileSystem.exists(new Path("/tmp"))) { assertTrue(fileSystem.delete(new Path("/tmp"), true)); } RemoteIterator<LocatedFileStatus> files = fileSystem.listLocatedStatus(new Path("/")); assertFalse(files.hasNext()); assertTrue(fileSystem.mkdirs(new Path("/test"), new FsPermission("700"))); files = fileSystem.listLocatedStatus(new Path("/")); assertTrue(files.hasNext()); while (files.hasNext()) { LocatedFileStatus file = files.next(); System.out.println(file.getPath().getName()); } }