@Override public Stream<WriteableConfiguration> getCachedConfigurations() throws IOException { Iterable<S3ObjectSummary> objectSummaries = S3Objects.withPrefix(s3, bucket, prefix); Stream<S3ObjectSummary> objectStream = StreamSupport.stream(objectSummaries.spliterator(), false); return objectStream.map(p -> { Integer version = getVersionIfMatch(p.getKey()); if (version == null) { return null; } return new Pair<>(version, p); }).filter(Objects::nonNull) .sorted(Comparator.comparing(pair -> ((Pair<Integer, S3ObjectSummary>) pair).getFirst()) .reversed()).map(pair -> new S3WritableConfiguration(s3, pair.getSecond(), Integer.toString(pair.getFirst()))); }
static void deletePrefix(AmazonS3 s3Client, String bucketName, String prefix) { List<DeleteObjectsRequest.KeyVersion> trashKeys = new ArrayList<>(); DeleteObjectsRequest deleteObjectsRequest = new DeleteObjectsRequest(bucketName); for (S3ObjectSummary summary: S3Objects.withPrefix(s3Client, bucketName, prefix)) { trashKeys.add(new DeleteObjectsRequest.KeyVersion(summary.getKey())); if (trashKeys.size() == BULK_DELETE_SIZE) { deleteObjectsRequest.setKeys(trashKeys); s3Client.deleteObjects(deleteObjectsRequest); trashKeys.clear(); } } if (!trashKeys.isEmpty()) { deleteObjectsRequest.setKeys(trashKeys); s3Client.deleteObjects(deleteObjectsRequest); } }
@Override protected void copy(RuntimeExecutionTrace source, RuntimeExecutionTrace target, RuntimeAnnotatedExecutionTrace absoluteSource, RuntimeAnnotatedExecutionTrace absoluteTarget) throws IOException { String sourcePrefix = toS3Path(source).getPrefixForChildren(); String targetPrefix = toS3Path(target).getPrefixForChildren(); for (S3ObjectSummary summary: S3Objects.withPrefix(s3Client, bucketName, sourcePrefix)) { if (summary.getKey().startsWith(sourcePrefix)) { String relativeKey = summary.getKey().substring(sourcePrefix.length()); s3Client.copyObject(bucketName, summary.getKey(), bucketName, targetPrefix + relativeKey); } else { log.error(String.format( "S3Objects.withPrefix() returned unexpected key '%s' when asked for prefix '%s'.", summary.getKey(), sourcePrefix )); } } }
@Override public Iterable<BinaryKey> getAllBinaryKeys() throws BinaryStoreException { try { final Iterator<S3ObjectSummary> objectsIterator = S3Objects.inBucket(s3Client, bucketName).iterator(); // Lambda to hand back BinaryKeys rather than S3ObjectSummaries return () -> { return new Iterator<BinaryKey>() { @Override public boolean hasNext() { return objectsIterator.hasNext(); } @Override public BinaryKey next() { S3ObjectSummary object = objectsIterator.next(); return new BinaryKey(object.getKey()); } }; }; } catch (AmazonClientException e) { throw new BinaryStoreException(e); } }
private static List<Record> createRecordsForWholeFileFromS3() throws Exception { Iterator<S3ObjectSummary> s3ObjectSummaryIterator = S3Objects.inBucket(s3client, SOURCE_BUCKET_NAME).iterator(); List<Record> records = new ArrayList<>(); while (s3ObjectSummaryIterator.hasNext()) { S3ObjectSummary s3ObjectSummary = s3ObjectSummaryIterator.next(); Map<String, Object> metadata = getS3Metadata(s3client.getObject(SOURCE_BUCKET_NAME, s3ObjectSummary.getKey())); Record record = RecordCreator.create(); record.getHeader().setAttribute("bucket", TARGET_BUCKET_NAME); record.set( FileRefUtil.getWholeFileRecordRootField( new S3FileRef.Builder() .s3Client(s3client) .s3ObjectSummary(s3ObjectSummary) .useSSE(false) .verifyChecksum(false) .bufferSize(1024) .build(), metadata ) ); records.add(record); } return records; }
public static void main(String[] args) throws Exception { DeleteBucketRequest deleteBucketRequest=new DeleteBucketRequest(AWSResources.S3_BUCKET_NAME); if(AWSResources.S3.doesBucketExist(AWSResources.S3_BUCKET_NAME)) try { AWSResources.S3.deleteBucket(deleteBucketRequest); System.out.println("Bucket Deleted..."); } catch (AmazonS3Exception ex) { if(!ex.getErrorCode().equals("BucketNotEmpty")) throw ex; } //List<KeyVersion> keys = new ArrayList<KeyVersion>(); for(S3ObjectSummary obj : S3Objects.withPrefix(AWSResources.S3, AWSResources.S3_BUCKET_NAME, "")) { // Add the keys to our list of object. AWSResources.S3.deleteObject(AWSResources.S3_BUCKET_NAME, obj.getKey()); } //DeleteObjectsRequest deleteObjectsRequest = new DeleteObjectsRequest(AWSResources.S3_BUCKET_NAME); //AWSResources.S3.deleteObjects(deleteObjectsRequest); AWSResources.S3.deleteBucket(deleteBucketRequest); if(!AWSResources.S3.doesBucketExist(AWSResources.S3_BUCKET_NAME)) System.out.println("Deleted"); }
public TaxiEventReader(AmazonS3 s3, String bucketName, String prefix) { this.s3 = s3; this.s3Objects = S3Objects.withPrefix(s3, bucketName, prefix).iterator(); //initialize next and hasNext fields next(); }
@Override public Stream<BlobId> getBlobIdStream() { Iterable<S3ObjectSummary> summaries = S3Objects.withPrefix(s3, getConfiguredBucket(), CONTENT_PREFIX); return StreamSupport.stream(summaries.spliterator(), false) .map(S3ObjectSummary::getKey) .map(key -> key.substring(key.lastIndexOf('/') + 1, key.length())) .filter(filename -> filename.endsWith(BLOB_ATTRIBUTE_SUFFIX) && !filename.startsWith(TEMPORARY_BLOB_ID_PREFIX)) .map(filename -> filename.substring(0, filename.length() - BLOB_ATTRIBUTE_SUFFIX.length())) .map(BlobId::new); }
static S3ObjectSummary getObjectSummary(AmazonS3 s3Client, String bucket, String objectKey) { S3ObjectSummary s3ObjectSummary = null; S3Objects s3ObjectSummaries = S3Objects.withPrefix(s3Client, bucket, objectKey); for (S3ObjectSummary s : s3ObjectSummaries) { if (s.getKey().equals(objectKey)) { s3ObjectSummary = s; break; } } return s3ObjectSummary; }
private int getObjectCount(AmazonS3 s3Client, String bucket) { int count = 0; for(S3ObjectSummary ignored : S3Objects.inBucket(s3Client, bucket)) { count++; } return count; }
private int getObjectCount(AmazonS3 s3Client, String bucket) { int count = 0; for(S3ObjectSummary s : S3Objects.inBucket(s3Client, bucket)) { count++; } return count; }
private Map<Pair<String, String>, S3ObjectSummary> getObjectSummaries(AmazonS3 s3Client, String bucket, String prefix) { Map<Pair<String, String>, S3ObjectSummary> s3ObjectSummaries = new HashMap<>(); for(S3ObjectSummary s : S3Objects.withPrefix(s3Client, bucket, prefix)) { s3ObjectSummaries.put(Pair.of(bucket, s.getKey()), s); } return s3ObjectSummaries; }
private int getObjectCount(AmazonS3 s3Client, String bucket, String prefix) { int count = 0; for(S3ObjectSummary ignored : S3Objects.withPrefix(s3Client, bucket, prefix)) { count++; } return count; }
public static void createBucket(AmazonS3 s3client, String bucketName) { if(s3client.doesBucketExist(bucketName)) { for(S3ObjectSummary s : S3Objects.inBucket(s3client, bucketName)) { s3client.deleteObject(bucketName, s.getKey()); } s3client.deleteBucket(bucketName); } Assert.assertFalse(s3client.doesBucketExist(bucketName)); // Note that CreateBucketRequest does not specify region. So bucket is // bucketName s3client.createBucket(new CreateBucketRequest(bucketName)); }
private int verifyAndReturnNoOfObjects() throws Exception { int numberOfObjects = 0; for (S3ObjectSummary s3ObjectSummary : S3Objects.inBucket(s3client, TARGET_BUCKET_NAME)) { String fileNameOrKey = s3ObjectSummary.getKey(); if (withFileNamePrefix) { //strip out the filePrefix sdc- fileNameOrKey = fileNameOrKey.substring(4); } switch (source) { case LOCAL: verifyStreamCorrectness( new FileInputStream(testDir.getAbsolutePath() + "/" + fileNameOrKey), s3client.getObject(TARGET_BUCKET_NAME, s3ObjectSummary.getKey()).getObjectContent() ); break; case S3: verifyStreamCorrectness( s3client.getObject(SOURCE_BUCKET_NAME, fileNameOrKey).getObjectContent(), s3client.getObject(TARGET_BUCKET_NAME, s3ObjectSummary.getKey()).getObjectContent() ); break; } deleteObjectsAfterVerificationInTarget(s3ObjectSummary.getKey()); numberOfObjects++; } return numberOfObjects; }
@Test public void createsFile() { List<String> names = newArrayList( S3Objects.inBucket(subject.getAmazonS3Client(), "bucket-name").iterator()) .stream().map( S3ObjectSummary::getKey).sorted().collect(Collectors.toList()); assertThat(names, is(Arrays.asList("fileName", "fileName2"))); }
private void start() throws IOException, InterruptedException { AmazonS3Client s3Client = new AmazonS3Client(); S3Objects s3Objects = S3Objects.withPrefix(s3Client, config.getS3Bucket(), config.getS3Key()); final ElasticWriter elasticWriter = new ElasticWriter(config.getElasticIndexName(), config.getElasticHost(), config.getElasticPort()); int count = 0; for (S3ObjectSummary s3ObjectSummary : s3Objects) { s3ObjectSummary.getKey(); S3Object object = s3Client.getObject(s3ObjectSummary.getBucketName(), s3ObjectSummary.getKey()); log.info("Downloading content of: {}", object.getKey()); S3ObjectInputStream objectContentStream = object.getObjectContent(); BufferedReader reader = new BufferedReader(new InputStreamReader(objectContentStream)); String line = null; while ((line = reader.readLine()) != null) { Scanner scanner = new Scanner(line).useDelimiter("\t"); String t1 = scanner.next(); String t2 = scanner.next(); Integer value = scanner.nextInt(); elasticWriter.write(t1, t2, value); count++; } log.info("Wrote {} pairs", count); } }
@Override public Stream<BlobId> getBlobIdStream() { Iterable<S3ObjectSummary> summaries = S3Objects.withPrefix(s3, getConfiguredBucket(), CONTENT_PREFIX); return blobIdStream(summaries); }
@Override public Stream<BlobId> getDirectPathBlobIdStream(final String prefix) { String subpath = format("%s/%s/%s", CONTENT_PREFIX, DIRECT_PATH_ROOT, prefix); Iterable<S3ObjectSummary> summaries = S3Objects.withPrefix(s3, getConfiguredBucket(), subpath); return blobIdStream(summaries); }
/** * Lists objects from AmazonS3 in chronological order [lexicographical order if 2 files have same timestamp] which are * later than or equal to the timestamp of the previous offset object * * @param s3Client * @param s3ConfigBean * @param pathMatcher glob patterns to match file name against * @param s3Offset current offset which provides the timestamp of the previous object * @param fetchSize number of objects to fetch in one go * @return * @throws AmazonClientException */ static List<S3ObjectSummary> listObjectsChronologically( AmazonS3 s3Client, S3ConfigBean s3ConfigBean, AntPathMatcher pathMatcher, AmazonS3Source.S3Offset s3Offset, int fetchSize ) { //Algorithm: // - Full scan all objects that match the file name pattern and which are later than the file in the offset // - Select the oldest "fetchSize" number of files and return them. TreeSet<S3ObjectSummary> treeSet = new TreeSet<>((o1, o2) -> { int result = o1.getLastModified().compareTo(o2.getLastModified()); if(result != 0) { //same modified time. Use name to sort return result; } return o1.getKey().compareTo(o2.getKey()); }); S3Objects s3ObjectSummaries = S3Objects .withPrefix(s3Client, s3ConfigBean.s3Config.bucket, s3ConfigBean.s3Config.commonPrefix) .withBatchSize(BATCH_SIZE); for (S3ObjectSummary s : s3ObjectSummaries) { String fullPrefix = s.getKey(); String remainingPrefix = fullPrefix.substring(s3ConfigBean.s3Config.commonPrefix.length(), fullPrefix.length()); if (!remainingPrefix.isEmpty()) { // remainingPrefix can be empty. // If the user manually creates a prefix "myFolder/mySubFolder" in bucket "myBucket" and uploads "myObject", // then the first objects returned here are: // myFolder/mySubFolder // myFolder/mySubFolder/myObject // // All is good when pipeline is run but preview returns with no data. So we should ignore the empty file as it // has no data if (pathMatcher.match(s3ConfigBean.s3FileConfig.prefixPattern, remainingPrefix) && isEligible(s, s3Offset)) { treeSet.add(s); } if (treeSet.size() > fetchSize) { treeSet.pollLast(); } } } return new ArrayList<>(treeSet); }
public static void main(String[] args) throws Exception { for (S3ObjectSummary summary : S3Objects.withPrefix(AWSResources.S3, AWSResources.S3_BUCKET_NAME, "photos/")) { System.out.printf("Object with key '%s'\n", summary.getKey()); } }