@Override public ObjectListing listObjects(ListObjectsRequest listObjectsRequest) throws AmazonClientException, AmazonServiceException { if ("nonExistingBucket".equals(listObjectsRequest.getBucketName()) && !nonExistingBucketCreated) { AmazonServiceException ex = new AmazonServiceException("Unknown bucket"); ex.setStatusCode(404); throw ex; } int capacity; ObjectListing objectListing = new ObjectListing(); if (!ObjectHelper.isEmpty(listObjectsRequest.getMaxKeys()) && listObjectsRequest.getMaxKeys() != null) { capacity = listObjectsRequest.getMaxKeys(); } else { capacity = maxCapacity; } for (int index = 0; index < objects.size() && index < capacity; index++) { S3ObjectSummary s3ObjectSummary = new S3ObjectSummary(); s3ObjectSummary.setBucketName(objects.get(index).getBucketName()); s3ObjectSummary.setKey(objects.get(index).getKey()); objectListing.getObjectSummaries().add(s3ObjectSummary); } return objectListing; }
private boolean isUploadFile(Iterator<S3ObjectSummary> iter, String path, String hash) { while (iter.hasNext()) { S3ObjectSummary fileS3 = iter.next(); // Filename should look like this: // a/b if (!fileS3.getKey().equals(path)) { // If this is another file, then continue! continue; } // Remove the file from the S3 list as it does not need to be processed further iter.remove(); // Upload if the hashes differ return StringUtils.isNullOrEmpty(hash) || !fileS3.getETag().equals(hash); } return true; }
@Override public Stream<WriteableConfiguration> getCachedConfigurations() throws IOException { Iterable<S3ObjectSummary> objectSummaries = S3Objects.withPrefix(s3, bucket, prefix); Stream<S3ObjectSummary> objectStream = StreamSupport.stream(objectSummaries.spliterator(), false); return objectStream.map(p -> { Integer version = getVersionIfMatch(p.getKey()); if (version == null) { return null; } return new Pair<>(version, p); }).filter(Objects::nonNull) .sorted(Comparator.comparing(pair -> ((Pair<Integer, S3ObjectSummary>) pair).getFirst()) .reversed()).map(pair -> new S3WritableConfiguration(s3, pair.getSecond(), Integer.toString(pair.getFirst()))); }
@Override public void getFileList(String path, OutputStream out) throws Exception { String marker = null; do { ListObjectsRequest request = new ListObjectsRequest(bucketName, path, null, "/", 1000); ObjectListing listing = client.listObjects(request); for (S3ObjectSummary object : listing.getObjectSummaries()) { String line = object.getKey() + "\n"; out.write(line.getBytes()); } for (String commonPrefix : listing.getCommonPrefixes()) { getFileList(commonPrefix, out); } marker = listing.getNextMarker(); } while (marker != null); }
private boolean processObjects(List<S3ObjectSummary> objects) { Logger.Debug("Scanning next batch of %s ", objects.size()); objects .parallelStream() .filter(this::shouldEnqueue) .forEach(object -> { numSeen.incrementAndGet(); String path = object.getBucketName() + "/" + object.getKey(); Logger.Info("Posting: %s", path); SendMessageRequest msg = new SendMessageRequest() .withQueueUrl(queueUrl) .withMessageBody(path); sqs.sendMessage(msg); }); if (max > -1L && numSeen.incrementAndGet() > max) { Logger.Info("Added max jobs, quitting"); return false; } return true; }
@Override @SuppressWarnings("unused") public List<AwsFileMiniModel> list(String prefix) { AmazonS3 s3client = new AmazonS3Client(new ProfileCredentialsProvider()); List<AwsFileMiniModel> files = new ArrayList(); ListObjectsRequest listObjectsRequest = new ListObjectsRequest().withBucketName(bucketName).withPrefix(prefix); ObjectListing objectListing; do { objectListing = s3client.listObjects(listObjectsRequest); for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) { System.out.println(" - " + objectSummary.getKey() + " " + "(size = " + objectSummary.getSize() + ")" + " (date = " + objectSummary.getLastModified() + ")"); files.add(new AwsFileMiniModel(objectSummary.getKey(), objectSummary.getLastModified())); } listObjectsRequest.setMarker(objectListing.getNextMarker()); } while (objectListing.isTruncated()); return files; }
/** * Test cleanup deletes everything in the bucket so obviously only use on Pairtree test buckets. * * @param aContext A test context */ @Override @After public void tearDown(final TestContext aContext) { super.tearDown(aContext); // Clean up our test resources in the S3 bucket final ObjectListing listing = myS3Client.listObjects(myTestBucket); final Iterator<S3ObjectSummary> iterator = listing.getObjectSummaries().iterator(); while (iterator.hasNext()) { try { myS3Client.deleteObject(myTestBucket, iterator.next().getKey()); } catch (final AmazonClientException details) { aContext.fail(details); } } }
static void deletePrefix(AmazonS3 s3Client, String bucketName, String prefix) { List<DeleteObjectsRequest.KeyVersion> trashKeys = new ArrayList<>(); DeleteObjectsRequest deleteObjectsRequest = new DeleteObjectsRequest(bucketName); for (S3ObjectSummary summary: S3Objects.withPrefix(s3Client, bucketName, prefix)) { trashKeys.add(new DeleteObjectsRequest.KeyVersion(summary.getKey())); if (trashKeys.size() == BULK_DELETE_SIZE) { deleteObjectsRequest.setKeys(trashKeys); s3Client.deleteObjects(deleteObjectsRequest); trashKeys.clear(); } } if (!trashKeys.isEmpty()) { deleteObjectsRequest.setKeys(trashKeys); s3Client.deleteObjects(deleteObjectsRequest); } }
@Override protected void copy(RuntimeExecutionTrace source, RuntimeExecutionTrace target, RuntimeAnnotatedExecutionTrace absoluteSource, RuntimeAnnotatedExecutionTrace absoluteTarget) throws IOException { String sourcePrefix = toS3Path(source).getPrefixForChildren(); String targetPrefix = toS3Path(target).getPrefixForChildren(); for (S3ObjectSummary summary: S3Objects.withPrefix(s3Client, bucketName, sourcePrefix)) { if (summary.getKey().startsWith(sourcePrefix)) { String relativeKey = summary.getKey().substring(sourcePrefix.length()); s3Client.copyObject(bucketName, summary.getKey(), bucketName, targetPrefix + relativeKey); } else { log.error(String.format( "S3Objects.withPrefix() returned unexpected key '%s' when asked for prefix '%s'.", summary.getKey(), sourcePrefix )); } } }
@Override protected void tearDown() throws Exception { final File theFile = new File(TEST_FILE_NAME); theFile.delete(); if (s3Account != null) { final AmazonS3Client s3 = new AmazonS3Client( new BasicAWSCredentials(s3Account.keyId, s3Account.secretKey)); String hash = s3Account.getHashedName(); if (s3.doesBucketExist(hash)) { for (S3ObjectSummary summary : s3.listObjects(hash).getObjectSummaries()) { s3.deleteObject(s3Account.getHashedName(), summary.getKey()); } } } super.tearDown(); }
/** * Gets the metadata for a single table in this stash. This is similar to getting the splits for the table * except that it exposes lower level information about the underlying S3 files. For clients who will use * their own system for reading the files from S3, such as source files for a map-reduce job, this method provides * the necessary information. For simply iterating over the stash contents using either {@link #scan(String)} * or {@link #getSplits(String)} in conjunction with {@link #getSplit(StashSplit)} is preferred. */ public StashTableMetadata getTableMetadata(String table) throws StashNotAvailableException, TableNotStashedException { ImmutableList.Builder<StashFileMetadata> filesBuilder = ImmutableList.builder(); Iterator<S3ObjectSummary> objectSummaries = getS3ObjectSummariesForTable(table); while (objectSummaries.hasNext()) { S3ObjectSummary objectSummary = objectSummaries.next(); filesBuilder.add(new StashFileMetadata(_bucket, objectSummary.getKey(), objectSummary.getSize())); } List<StashFileMetadata> files = filesBuilder.build(); // Get the prefix arbitrarily from the first file. String prefix = files.get(0).getKey(); prefix = prefix.substring(0, prefix.lastIndexOf('/') + 1); return new StashTableMetadata(_bucket, prefix, table, files); }
private Answer<ObjectListing> objectListingAnswer(@Nullable final String marker, final String... fileNames) { return new Answer<ObjectListing>() { @Override public ObjectListing answer(InvocationOnMock invocation) throws Throwable { ListObjectsRequest request = (ListObjectsRequest) invocation.getArguments()[0]; ObjectListing objectListing = new ObjectListing(); objectListing.setBucketName(request.getBucketName()); objectListing.setPrefix(request.getPrefix()); objectListing.setTruncated(marker != null); objectListing.setNextMarker(marker); for (String fileName : fileNames) { S3ObjectSummary objectSummary = new S3ObjectSummary(); objectSummary.setKey(request.getPrefix() + fileName); objectSummary.setSize(100); objectListing.getObjectSummaries().add(objectSummary); } return objectListing; } }; }
@Test public void testCompressFile() throws Exception { String avroCodec = "snappy"; localProps.put(StorageSinkConnectorConfig.AVRO_CODEC_CONFIG, avroCodec); setUp(); task = new S3SinkTask(connectorConfig, context, storage, partitioner, format, SYSTEM_TIME); List<SinkRecord> sinkRecords = createRecords(7); // Perform write task.put(sinkRecords); task.close(context.assignment()); task.stop(); List<S3ObjectSummary> summaries = listObjects(S3_TEST_BUCKET_NAME, "/", s3); for(S3ObjectSummary summary: summaries){ InputStream in = s3.getObject(summary.getBucketName(), summary.getKey()).getObjectContent(); DatumReader<Object> reader = new GenericDatumReader<>(); DataFileStream<Object> streamReader = new DataFileStream<>(in, reader); // make sure that produced Avro file has proper codec set Assert.assertEquals(avroCodec, streamReader.getMetaString(StorageSinkConnectorConfig.AVRO_CODEC_CONFIG)); streamReader.close(); } long[] validOffsets = {0, 3, 6}; verify(sinkRecords, validOffsets); }
public static List<S3ObjectSummary> listObjects(String bucket, String prefix, AmazonS3 s3) { List<S3ObjectSummary> objects = new ArrayList<>(); ObjectListing listing; try { if (prefix == null) { listing = s3.listObjects(bucket); } else { listing = s3.listObjects(bucket, prefix); } objects.addAll(listing.getObjectSummaries()); while (listing.isTruncated()) { listing = s3.listNextBatchOfObjects(listing); objects.addAll(listing.getObjectSummaries()); } } catch (AmazonS3Exception e) { log.warn("listObjects for bucket '{}' prefix '{}' returned error code: {}", bucket, prefix, e.getStatusCode()); } return objects; }
@Override public void deleteByTenant(final String tenant) { final String folder = sanitizeTenant(tenant); LOG.info("Deleting S3 object folder (tenant) from bucket {} and key {}", s3Properties.getBucketName(), folder); // Delete artifacts ObjectListing objects = amazonS3.listObjects(s3Properties.getBucketName(), folder + "/"); do { for (final S3ObjectSummary objectSummary : objects.getObjectSummaries()) { amazonS3.deleteObject(s3Properties.getBucketName(), objectSummary.getKey()); } objects = amazonS3.listNextBatchOfObjects(objects); } while (objects.isTruncated()); }
@Override public Iterable<BinaryKey> getAllBinaryKeys() throws BinaryStoreException { try { final Iterator<S3ObjectSummary> objectsIterator = S3Objects.inBucket(s3Client, bucketName).iterator(); // Lambda to hand back BinaryKeys rather than S3ObjectSummaries return () -> { return new Iterator<BinaryKey>() { @Override public boolean hasNext() { return objectsIterator.hasNext(); } @Override public BinaryKey next() { S3ObjectSummary object = objectsIterator.next(); return new BinaryKey(object.getKey()); } }; }; } catch (AmazonClientException e) { throw new BinaryStoreException(e); } }
public static void main(String[] args) throws Exception { // create the AWS S3 Client AmazonS3 s3 = AWSS3Factory.getS3Client(); // delete the demo bucket and all its content for (S3ObjectSummary summary : s3.listObjects(AWSS3Factory.S3_BUCKET).getObjectSummaries()) { s3.deleteObject(AWSS3Factory.S3_BUCKET, summary.getKey()); System.out.println(String.format("Deleted [%s/%s]", AWSS3Factory.S3_BUCKET, summary.getKey())); } s3.deleteBucket(AWSS3Factory.S3_BUCKET); // print bucket key/value and content for validation System.out.println( String.format("deleted bucket [%s]", AWSS3Factory.S3_BUCKET)); }
public static void main(String[] args) { final String USAGE = "\n" + "To run this example, supply the name of a bucket to list!\n" + "\n" + "Ex: ListObjects <bucket-name>\n"; if (args.length < 1) { System.out.println(USAGE); System.exit(1); } String bucket_name = args[0]; System.out.format("Objects in S3 bucket %s:\n", bucket_name); final AmazonS3 s3 = AmazonS3ClientBuilder.defaultClient(); ObjectListing ol = s3.listObjects(bucket_name); List<S3ObjectSummary> objects = ol.getObjectSummaries(); for (S3ObjectSummary os: objects) { System.out.println("* " + os.getKey()); } }
@Test public void testGetKeysInPartialPath() { AmazonS3 client = mock(AmazonS3.class); S3StoreService service = new S3StoreService(client, S3_BUCKET, S3_PREFIX); String path = "path"; String key = "my-key"; S3ObjectSummary summary = new S3ObjectSummary(); summary.setKey(S3_PREFIX + key); ObjectListing listing = mock(ObjectListing.class); when(listing.getObjectSummaries()).thenReturn(Lists.newArrayList(summary)); when(client.listObjects(S3_BUCKET, S3_PREFIX + "/" + path)).thenReturn(listing); // invoke method under test Set<String> results = service.getKeysInPartialPath(path); assertEquals(1, results.size()); assertEquals(key, results.iterator().next()); }
private void subscribe(S3ObjectSummary os, Observable<? extends Object> result) { result.subscribe(new Subscriber<Object>() { @Override public void onCompleted() { if (s3ObjectsInPipeline.remove(os)) { processingState.completed(os); release(); } } @Override public void onError(Throwable e) { if (s3ObjectsInPipeline.remove(os)) { release(); } if (LOGGER.isTraceEnabled()) e.printStackTrace(); } @Override public void onNext(Object event) { } }); }
@Override public ObjectListing listObjects(ListObjectsRequest request) throws AmazonClientException, AmazonServiceException { int currentRequestCount = requestCount.incrementAndGet(); assertEquals("mycamelbucket", request.getBucketName()); if (currentRequestCount == 2) { assertEquals("confidential", request.getPrefix()); } ObjectListing response = new ObjectListing(); response.setBucketName(request.getBucketName()); response.setPrefix(request.getPrefix()); S3ObjectSummary s3ObjectSummary = new S3ObjectSummary(); s3ObjectSummary.setBucketName(request.getBucketName()); s3ObjectSummary.setKey("key"); response.getObjectSummaries().add(s3ObjectSummary); return response; }
@Override protected List<String> getFileNames(String lockPrefix) throws Exception { ListObjectsRequest request = new ListObjectsRequest(); request.setBucketName(bucket); request.setPrefix(lockPrefix); ObjectListing objectListing = client.listObjects(request); return Lists.transform ( objectListing.getObjectSummaries(), new Function<S3ObjectSummary, String>() { @Override public String apply(S3ObjectSummary summary) { return summary.getKey(); } } ); }
public List<String> listKeysInOutputBucket(String bucketName) { List<String> videoKeys = new ArrayList<>(); ListObjectsRequest listFirstLevelKeyRequest = new ListObjectsRequest() .withBucketName(bucketName) .withDelimiter("/"); for (String commonPrefix : s3client.listObjects(listFirstLevelKeyRequest).getCommonPrefixes()) { ListObjectsRequest listObjectsRequest = new ListObjectsRequest() .withBucketName(bucketName) .withPrefix(commonPrefix) .withDelimiter("/"); for (S3ObjectSummary file : s3client.listObjects(listObjectsRequest).getObjectSummaries()) { videoKeys.add(file.getKey()); } } return videoKeys; }
@Test public void testListBucket() { final String dataBucket = "noaa-nexrad-level2"; String[] key = {"2010/01/01", "2010/07/14"}; String jobID = null; AWSInterface awsInterface = new AWSInterface(jobID, null); AWSAnonInterface awsAnonInterface = new AWSAnonInterface(); List<S3ObjectSummary> summaries; int[] output = new int[2]; for(int i = 0 ; i < 2; i++) { summaries = awsAnonInterface.ListBucket(dataBucket, key[i]); output[i] = summaries.size(); summaries.clear(); } int[] answer = {14104, 33468}; assertArrayEquals(answer, output); System.out.println("ListBucket() is ok"); }
@Test public void testValidateCopiedS3Files() throws IOException { // Create two lists of expected and actual storage files. // Please note we use different row count values to confirm that row count match is not validated. List<StorageFile> testExpectedFiles = new ArrayList<>(); List<S3ObjectSummary> testActualFiles = new ArrayList<>(); for (String file : LOCAL_FILES) { String filePath = String.format(String.format("%s/%s", TEST_S3_KEY_PREFIX, file)); testExpectedFiles.add(new StorageFile(filePath, FILE_SIZE, ROW_COUNT)); testActualFiles.add(createS3ObjectSummary(filePath, FILE_SIZE)); } // Validate the files. storageFileHelper.validateCopiedS3Files(testExpectedFiles, testActualFiles, STORAGE_NAME, new BusinessObjectDataKey(BDEF_NAMESPACE, BDEF_NAME, FORMAT_USAGE_CODE, FORMAT_FILE_TYPE_CODE, FORMAT_VERSION, PARTITION_VALUE, SUBPARTITION_VALUES, DATA_VERSION)); }
@Test public void testValidateCopiedS3FilesActualFileSizeMismatch() throws IOException { // Create two lists of expected and actual storage files, with expected file size not matching actual file size. List<StorageFile> testExpectedFiles = Arrays.asList(new StorageFile(TARGET_S3_KEY, FILE_SIZE, NO_ROW_COUNT)); List<S3ObjectSummary> testActualFiles = Arrays.asList(createS3ObjectSummary(TARGET_S3_KEY, FILE_SIZE_2)); // Try to validate S3 files when expected file size does not match actual file size. try { storageFileHelper.validateCopiedS3Files(testExpectedFiles, testActualFiles, STORAGE_NAME, new BusinessObjectDataKey(BDEF_NAMESPACE, BDEF_NAME, FORMAT_USAGE_CODE, FORMAT_FILE_TYPE_CODE, FORMAT_VERSION, PARTITION_VALUE, SUBPARTITION_VALUES, DATA_VERSION)); fail("Should throw an IllegalStateException when expected file size does not match actual file size."); } catch (IllegalStateException e) { assertEquals(String .format("Specified file size of %d bytes for copied \"%s\" S3 file in \"%s\" storage does not match file size of %d bytes reported by S3.", FILE_SIZE, TARGET_S3_KEY, STORAGE_NAME, FILE_SIZE_2), e.getMessage()); } }
@Test public void testValidateRegisteredS3Files() throws IOException { // Create two lists of expected and actual storage files. // Please note we use different row count values to confirm that row count match is not validated. List<StorageFile> testExpectedFiles = new ArrayList<>(); List<S3ObjectSummary> testActualFiles = new ArrayList<>(); for (String file : LOCAL_FILES) { String filePath = String.format(String.format("%s/%s", TEST_S3_KEY_PREFIX, file)); testExpectedFiles.add(new StorageFile(filePath, FILE_SIZE, ROW_COUNT)); testActualFiles.add(createS3ObjectSummary(filePath, FILE_SIZE)); } // Validate the files. storageFileHelper.validateRegisteredS3Files(testExpectedFiles, testActualFiles, STORAGE_NAME, new BusinessObjectDataKey(BDEF_NAMESPACE, BDEF_NAME, FORMAT_USAGE_CODE, FORMAT_FILE_TYPE_CODE, FORMAT_VERSION, PARTITION_VALUE, SUBPARTITION_VALUES, DATA_VERSION)); }
@Test public void testValidateRegisteredS3FilesUnexpectedS3FileFound() throws IOException { // Create two lists of expected and actual storage files, with an actual file not being added to the list of expected files. List<StorageFile> testExpectedFiles = new ArrayList<>(); List<S3ObjectSummary> testActualFiles = Arrays.asList(createS3ObjectSummary(TARGET_S3_KEY, FILE_SIZE_1_KB)); // Create a business object data key. BusinessObjectDataKey businessObjectDataKey = new BusinessObjectDataKey(BDEF_NAMESPACE, BDEF_NAME, FORMAT_USAGE_CODE, FORMAT_FILE_TYPE_CODE, FORMAT_VERSION, PARTITION_VALUE, SUBPARTITION_VALUES, DATA_VERSION); // Try to validate S3 files when unexpected S3 file exists. try { storageFileHelper.validateRegisteredS3Files(testExpectedFiles, testActualFiles, STORAGE_NAME, businessObjectDataKey); fail("Should throw an IllegalStateException when S3 contains unexpected S3 file."); } catch (IllegalStateException e) { assertEquals(String .format("Found unexpected S3 file \"%s\" in \"%s\" storage while validating registered S3 files. Business object data {%s}", TARGET_S3_KEY, STORAGE_NAME, businessObjectDataServiceTestHelper.getExpectedBusinessObjectDataKeyAsString(businessObjectDataKey)), e.getMessage()); } }
@Test public void testListDirectoryIgnoreZeroByteDirectoryMarkers() { // Create an S3 file transfer request parameters DTO. S3FileTransferRequestParamsDto s3FileTransferRequestParamsDto = new S3FileTransferRequestParamsDto(); // Create a list of S3 object summaries. List<S3ObjectSummary> s3ObjectSummaries = Arrays.asList(new S3ObjectSummary()); // Mock the external calls. when(s3Dao.listDirectory(s3FileTransferRequestParamsDto, true)).thenReturn(s3ObjectSummaries); // Call the method under test. List<S3ObjectSummary> result = s3Service.listDirectory(s3FileTransferRequestParamsDto, true); // Verify the external calls. verify(s3Dao).listDirectory(s3FileTransferRequestParamsDto, true); verifyNoMoreInteractions(s3Dao); // Validate the returned object. assertEquals(s3ObjectSummaries, result); }
@Override @Nonnull public Record getRecord(@Nonnull String path) throws IOException { ObjectListing listing = s3.listObjects( new ListObjectsRequest().withBucketName(bucket).withPrefix(path.substring(1))); S3ObjectSummary summary = listing.getObjectSummaries().stream().findFirst().orElse(null); if (summary == null) { return Record.noFile(uri, path); } long time = summary.getLastModified().getTime(); long size = summary.getSize(); boolean directory = summary.getKey().endsWith("/"); return new Record(uri, RecordPath.from("/" + summary.getKey()), time, size, directory); }
@Override public List<NoteInfo> list(AuthenticationInfo subject) throws IOException { List<NoteInfo> infos = new LinkedList<>(); NoteInfo info; try { ListObjectsRequest listObjectsRequest = new ListObjectsRequest() .withBucketName(bucketName) .withPrefix(user + "/" + "notebook"); ObjectListing objectListing; do { objectListing = s3client.listObjects(listObjectsRequest); for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) { if (objectSummary.getKey().endsWith("note.json")) { info = getNoteInfo(objectSummary.getKey()); if (info != null) { infos.add(info); } } } listObjectsRequest.setMarker(objectListing.getNextMarker()); } while (objectListing.isTruncated()); } catch (AmazonClientException ace) { throw new IOException("Unable to list objects in S3: " + ace, ace); } return infos; }
public ArrayList<String> getAllChildren(String folderName) throws IOException { ListObjectsRequest listRequest = new ListObjectsRequest(); listRequest.setBucketName(getBucketName()); listRequest.setPrefix(folderName); ObjectListing listing = s3.listObjects(listRequest); ArrayList<String> list = new ArrayList<String>(); System.out.println(listing.getObjectSummaries().size()); for (S3ObjectSummary summ : listing.getObjectSummaries()) { list.add(summ.getKey()); } return list; }
public List<String> getAllChildren(String folderName, String bucket) throws IOException { ListObjectsRequest listRequest = new ListObjectsRequest(); listRequest.setBucketName(bucket); if (!(folderName == null || folderName.equals(""))) { listRequest.setPrefix(folderName); } ObjectListing listing = s3.listObjects(listRequest); ArrayList<String> list = new ArrayList<String>(); for (S3ObjectSummary summ : listing.getObjectSummaries()) { list.add(summ.getKey()); } return list; }