private FileChecksum getFileChecksum(String f) throws IOException { final HttpURLConnection connection = openConnection( "/fileChecksum" + ServletUtil.encodePath(f), "ugi=" + getEncodedUgiParameter()); try { final XMLReader xr = XMLReaderFactory.createXMLReader(); xr.setContentHandler(this); xr.parse(new InputSource(connection.getInputStream())); } catch(SAXException e) { final Exception embedded = e.getException(); if (embedded != null && embedded instanceof IOException) { throw (IOException)embedded; } throw new IOException("invalid xml directory content", e); } finally { connection.disconnect(); } return filechecksum; }
@Override protected void processPath(PathData item) throws IOException { if (item.stat.isDirectory()) { throw new PathIsDirectoryException(item.toString()); } FileChecksum checksum = item.fs.getFileChecksum(item.path); if (checksum == null) { out.printf("%s\tNONE\t%n", item.toString()); } else { String checksumString = StringUtils.byteToHexString( checksum.getBytes(), 0, checksum.getLength()); out.printf("%s\t%s\t%s%n", item.toString(), checksum.getAlgorithmName(), checksumString); } }
public PathMetadata( Path location, long lastModifiedTimestamp, FileChecksum checksum, List<PathMetadata> childrenMetadata) { this.location = location.toUri().toString(); this.lastModifiedTimestamp = lastModifiedTimestamp; if (checksum == null) { checkSumAlgorithmName = null; checkSumLength = 0; this.checksum = null; } else { checkSumAlgorithmName = checksum.getAlgorithmName(); checkSumLength = checksum.getLength(); this.checksum = checksum.getBytes(); } this.childrenMetadata = childrenMetadata == null ? ImmutableList.<PathMetadata> of() : ImmutableList.copyOf(childrenMetadata); }
@Override public PathMetadata apply(@Nonnull Path location) { try { FileSystem fs = location.getFileSystem(conf); FileStatus fileStatus = fs.getFileStatus(location); FileChecksum checksum = null; if (fileStatus.isFile()) { checksum = fs.getFileChecksum(location); } List<PathMetadata> childPathDescriptors = new ArrayList<>(); if (fileStatus.isDirectory()) { FileStatus[] childStatuses = fs.listStatus(location); for (FileStatus childStatus : childStatuses) { childPathDescriptors.add(apply(childStatus.getPath())); } } return new PathMetadata(location, fileStatus.getModificationTime(), checksum, childPathDescriptors); } catch (IOException e) { throw new CircusTrainException("Unable to compute digest for location " + location.toString(), e); } }
private void testChecksum() throws Exception { if (!isLocalFS()) { FileSystem fs = FileSystem.get(getProxiedFSConf()); fs.mkdirs(getProxiedFSTestDir()); Path path = new Path(getProxiedFSTestDir(), "foo.txt"); OutputStream os = fs.create(path); os.write(1); os.close(); FileChecksum hdfsChecksum = fs.getFileChecksum(path); fs.close(); fs = getHttpFSFileSystem(); FileChecksum httpChecksum = fs.getFileChecksum(path); fs.close(); Assert.assertEquals(httpChecksum.getAlgorithmName(), hdfsChecksum.getAlgorithmName()); Assert.assertEquals(httpChecksum.getLength(), hdfsChecksum.getLength()); Assert.assertArrayEquals(httpChecksum.getBytes(), hdfsChecksum.getBytes()); } }
@Test public void testGetFileChecksum() throws IOException, URISyntaxException { // Create two different files in HDFS fileSystemTestHelper.createFile(fHdfs, someFile); fileSystemTestHelper.createFile(fHdfs, fileSystemTestHelper .getTestRootPath(fHdfs, someFile + "other"), 1, 512); // Get checksum through ViewFS FileChecksum viewFSCheckSum = vfs.getFileChecksum( new Path("/vfstmp/someFileForTestGetFileChecksum")); // Get checksum through HDFS. FileChecksum hdfsCheckSum = fHdfs.getFileChecksum( new Path(someFile)); // Get checksum of different file in HDFS FileChecksum otherHdfsFileCheckSum = fHdfs.getFileChecksum( new Path(someFile+"other")); // Checksums of the same file (got through HDFS and ViewFS should be same) assertEquals("HDFS and ViewFS checksums were not the same", viewFSCheckSum, hdfsCheckSum); // Checksum of different files should be different. assertFalse("Some other HDFS file which should not have had the same " + "checksum as viewFS did!", viewFSCheckSum.equals(otherHdfsFileCheckSum)); }
public void testGetFileChecksum(final Path foo, final int appendLength) throws Exception { final int appendRounds = 16; FileChecksum[] fc = new FileChecksum[appendRounds + 1]; DFSTestUtil.createFile(dfs, foo, appendLength, REPLICATION, 0L); fc[0] = dfs.getFileChecksum(foo); for (int i = 0; i < appendRounds; i++) { DFSTestUtil.appendFile(dfs, foo, appendLength); fc[i + 1] = dfs.getFileChecksum(foo); } for (int i = 0; i < appendRounds + 1; i++) { FileChecksum checksum = dfs.getFileChecksum(foo, appendLength * (i+1)); Assert.assertTrue(checksum.equals(fc[i])); } }
private void compareCheckSums(FileSystem sourceFS, Path source, FileChecksum sourceChecksum, FileSystem targetFS, Path target) throws IOException { if (!DistCpUtils.checksumsAreEqual(sourceFS, source, sourceChecksum, targetFS, target)) { StringBuilder errorMessage = new StringBuilder("Check-sum mismatch between ") .append(source).append(" and ").append(target).append("."); if (sourceFS.getFileStatus(source).getBlockSize() != targetFS.getFileStatus(target).getBlockSize()) { errorMessage.append(" Source and target differ in block-size.") .append(" Use -pb to preserve block-sizes during copy.") .append(" Alternatively, skip checksum-checks altogether, using -skipCrc.") .append(" (NOTE: By skipping checksums, one runs the risk of masking data-corruption during file-transfer.)"); } throw new IOException(errorMessage.toString()); } }
/** * Check if the two files are equal by looking at the file length, * and at the checksum (if user has specified the verifyChecksum flag). */ private boolean sameFile(final FileStatus inputStat, final FileStatus outputStat) { // Not matching length if (inputStat.getLen() != outputStat.getLen()) return false; // Mark files as equals, since user asked for no checksum verification if (!verifyChecksum) return true; // If checksums are not available, files are not the same. FileChecksum inChecksum = getFileChecksum(inputFs, inputStat.getPath()); if (inChecksum == null) return false; FileChecksum outChecksum = getFileChecksum(outputFs, outputStat.getPath()); if (outChecksum == null) return false; return inChecksum.equals(outChecksum); }
private long copyToFile(Path targetPath, FileSystem targetFS, FileStatus sourceFileStatus, long sourceOffset, Mapper.Context context, EnumSet<FileAttribute> fileAttributes, final FileChecksum sourceChecksum) throws IOException { FsPermission permission = FsPermission.getFileDefault().applyUMask( FsPermission.getUMask(targetFS.getConf())); final OutputStream outStream; if (action == FileAction.OVERWRITE) { final short repl = getReplicationFactor(fileAttributes, sourceFileStatus, targetFS, targetPath); final long blockSize = getBlockSize(fileAttributes, sourceFileStatus, targetFS, targetPath); FSDataOutputStream out = targetFS.create(targetPath, permission, EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE), BUFFER_SIZE, repl, blockSize, context, getChecksumOpt(fileAttributes, sourceChecksum)); outStream = new BufferedOutputStream(out); } else { outStream = new BufferedOutputStream(targetFS.append(targetPath, BUFFER_SIZE)); } return copyBytes(sourceFileStatus, sourceOffset, outStream, BUFFER_SIZE, context); }
/** * Get the checksum of a file. * * @param file The file path * @return The file checksum. The default return value is null, * which indicates that no checksum algorithm is implemented * in the corresponding FileSystem. */ @Override public FileChecksum getFileChecksum(final Path file) throws IOException { final String mantaPath = mantaPath(file); try { final MantaObject head = client.head(mantaPath); if (head.isDirectory()) { throw new IOException("Can't get checksum of directory"); } byte[] md5bytes = head.getMd5Bytes(); return new MantaChecksum(md5bytes); } catch (MantaClientHttpResponseException e) { if (e.getStatusCode() == HttpStatus.SC_NOT_FOUND) { throw new FileNotFoundException(mantaPath); } throw e; } }
/** * Returns true if the both files have checksums and they match. Returns false if checksums exist * but they do not match. Returns empty if either file does not have a checksum. * * @param conf configuration use to create the FileSystems * @param srcFile source file * @param destFile destination file * @throws IOException if there is an error getting the checksum for the specified files */ public static Optional<Boolean> checksumsMatch(Configuration conf, Path srcFile, Path destFile) throws IOException { FileSystem srcFs = srcFile.getFileSystem(conf); FileChecksum srcChecksum = srcFs.getFileChecksum(srcFile); FileSystem destFs = destFile.getFileSystem(conf); FileChecksum destChecksum = destFs.getFileChecksum(destFile); if (srcChecksum == null || destChecksum == null) { // If either filesystem does not support checksums return Optional.empty(); } else { return Optional.of(Boolean.valueOf(srcChecksum.equals(destChecksum))); } }
@Override protected void processPath(PathData item) throws IOException { if (item.stat.isDirectory()) { throw new PathIsDirectoryException(item.toString()); } FileChecksum checksum = item.fs.getFileChecksum(item.path); if (checksum == null) { out.printf("%s\tNONE\t\n", item.toString()); } else { String checksumString = StringUtils.byteToHexString( checksum.getBytes(), 0, checksum.getLength()); out.printf("%s\t%s\t%s\n", item.toString(), checksum.getAlgorithmName(), checksumString); } }
private FileChecksum getFileChecksum(String f) throws IOException { final HttpURLConnection connection = openConnection( "/fileChecksum" + f, "ugi=" + ugi); try { final XMLReader xr = XMLReaderFactory.createXMLReader(); xr.setContentHandler(this); connection.setRequestMethod("GET"); connection.connect(); xr.parse(new InputSource(connection.getInputStream())); } catch(SAXException e) { final Exception embedded = e.getException(); if (embedded != null && embedded instanceof IOException) { throw (IOException)embedded; } throw new IOException("invalid xml directory content", e); } finally { connection.disconnect(); } return filechecksum; }