private String regionForUri(AmazonS3 client, AmazonS3URI uri) { String bucketRegion = client.getBucketLocation(uri.getBucket()); Region region = Region.fromValue(bucketRegion); // S3 doesn't have a US East 1 region, US East 1 is really the region // US Standard. US Standard places the data in either an east coast // or west coast data center geographically closest to you. // SigV4 requires you to mention a region while signing a request // and for the S3's US standard endpoints the value to be used is "us-east-1" // US West 1 has an endpoint and so is treated as a stand alone region, // US East 1 doesn't and so is bundled into US Standard if (region.equals(Region.US_Standard)) { bucketRegion = "us-east-1"; } else { bucketRegion = region.toString(); } return bucketRegion; }
private void startAllCopyJobs() { AmazonS3URI sourceBase = toAmazonS3URI(sourceBaseLocation.toUri()); AmazonS3URI targetBase = toAmazonS3URI(replicaLocation.toUri()); srcClient = s3ClientFactory.newInstance(sourceBase, s3s3CopierOptions); targetClient = s3ClientFactory.newInstance(targetBase, s3s3CopierOptions); transferManager = transferManagerFactory.newInstance(targetClient, s3s3CopierOptions); if (sourceSubLocations.isEmpty()) { copy(sourceBase, targetBase); } else { for (Path path : sourceSubLocations) { AmazonS3URI subLocation = toAmazonS3URI(path.toUri()); String partitionKey = StringUtils.removeStart(subLocation.getKey(), sourceBase.getKey()); partitionKey = StringUtils.removeStart(partitionKey, "/"); AmazonS3URI targetS3Uri = toAmazonS3URI(new Path(replicaLocation, partitionKey).toUri()); LOG.debug("Starting copyJob from {} to {}", subLocation, targetS3Uri); copy(subLocation, targetS3Uri); } } }
@Override public Stream<String> lines() { LOGGER.debug("starting download from {}", uri); AmazonS3URI s3URI = new AmazonS3URI(uri); S3Object s3Object = s3Client.getObject(s3URI.getBucket(), s3URI.getKey()); InputStream stream = s3Object.getObjectContent(); return new BufferedReader(new InputStreamReader(stream)).lines(); }
/** * A method that seeks and downloads the index for the set BAM URI. * Seeks an index file with the same name in the BAM directory * in case there's no custom index URI specified * * @param bamURI an http address of the required file. * @return A SeekableStream optional on index file URI */ Optional<SeekableStream> loadIndex(AmazonS3URI bamURI) throws IOException { LOG.info("Trying to set index file for " + bamURI.toString()); Optional<AmazonS3URI> index = providedIndexURI() .map(Optional::of) .orElseGet(() -> nearbyIndexURI(bamURI)); if (!index.isPresent()) { LOG.info("Index wasn't provided for " + bamURI.toString()); return Optional.empty(); } LOG.info("Start download index: " + index.get()); AmazonS3URI indexURI = index.get(); S3InputStreamFactory streamFactory = new S3InputStreamFactory(client); InputStream stream = streamFactory.loadFully(indexURI); long fileSize = client.getFileSize(indexURI); byte[] buffer = IOUtils.toByteArray(stream); if (fileSize != buffer.length) { throw new IOException("Failed to fully download index " + indexURI); } LOG.info("Finished download index: " + index.get()); return Optional.of(new SeekableMemoryStream(buffer, indexURI.toString())); }
/** * A method that creates a SamReader object that's passed on to the HTSJDK. * Each time someone tries to open a SamReader on an URL, * HTSJDK checks if there's a custom reader factory and if it's there, this method is called. * * @param url target file URL * @return A SamReader object on a specified file URL */ @Override public SamReader open(URL url) { PerformanceMonitor.start(); AmazonS3URI amazonURI = new AmazonS3URI(url.toString()); S3Client client = new S3Client(); S3InputStreamFactory streamFactory = new S3InputStreamFactory(client); //download index file if is possible, and then start download .bam file final Optional<SeekableStream> indexStream; try { IndexLoader loader = new IndexLoader(client); indexStream = loader.loadIndex(amazonURI); } catch (IOException e) { throw new RuntimeIOException(e.getMessage() + " failed to download index", e); } SeekableStream stream = new S3SeekableStream(amazonURI, client, streamFactory); SamReaderFactory factory = SamReaderFactory.makeDefault(); SamInputResource inputResource = SamInputResource.of(stream); indexStream.ifPresent(inputResource::index); return factory.open(inputResource); }
/** * A method that returns true if a correct s3 URI was provided and false otherwise. * * @param uri The provided URI for the file. * @return a boolean value that shows whether the correct URI was provided */ boolean isFileExisting(AmazonS3URI uri) { boolean exist = true; try { aws.getObjectMetadata(uri.getBucket(), uri.getKey()); } catch (AmazonS3Exception e) { if (e.getStatusCode() == HttpStatus.SC_FORBIDDEN || e.getStatusCode() == HttpStatus.SC_NOT_FOUND) { exist = false; } else { throw e; } } return exist; }
public static void mockPrimitiveLoadFromTo(S3InputStreamFactory factory, int dataSize) { PowerMockito.mockStatic(S3InputStreamFactory.class); PowerMockito.when(factory.loadFromTo(Mockito.any(AmazonS3URI.class), Mockito.anyLong(), Mockito.anyLong())) .then((invocation) -> new InputStream() { final Log log = Log.getInstance(InputStream.class); private int size = dataSize; @Override public int read() throws IOException { final int returnByte = (size-- > 0) ? FROM_STREAM_CHAR : -1; log.debug("Stream(", this.hashCode(), " size = ", size, " return: ", returnByte); return returnByte; } } ); }
public static void mockAutoSeqLoadFromTo(S3InputStreamFactory factory) { PowerMockito.mockStatic(S3InputStreamFactory.class); PowerMockito .when(factory.loadFromTo( Mockito.any(AmazonS3URI.class), Mockito.anyLong(), Mockito.anyLong())) .then((invocation) -> new InputStream() { long from = (Long) invocation.getArguments()[1]; final long to = (Long) invocation.getArguments()[2]; @Override public int read() throws IOException { return (from < to) ? (int) from++ : END_BYTE; } } ); }
@Override public void setConf(AbstractConfig config) { this.config = (GeoIpOperationConfig) config; AmazonS3Client client = this.s3Factory.newInstance(); AmazonS3URI uri = new AmazonS3URI(this.config.getGeoLiteDb()); GetObjectRequest req = new GetObjectRequest(uri.getBucket(), uri.getKey()); S3Object obj = client.getObject(req); try { this.databaseReader = new DatabaseReader.Builder(obj.getObjectContent()).withCache(new CHMCache()).build(); } catch (IOException e) { throw new ConfigurationException("Unable to read " + this.config.getGeoLiteDb(), e); } }
public static BenderConfig load(AmazonS3ClientFactory s3ClientFactory, AmazonS3URI s3Uri) { AmazonS3Client s3 = s3ClientFactory.newInstance(); S3Object s3object = s3.getObject(s3Uri.getBucket(), s3Uri.getKey()); StringWriter writer = new StringWriter(); try { IOUtils.copy(s3object.getObjectContent(), writer, "UTF-8"); } catch (IOException e) { throw new ConfigurationException("Unable to read file from s3", e); } BenderConfig config = load(s3Uri.getKey().toString(), writer.toString()); config.setConfigFile(s3Uri.getURI().toString()); return config; }
/** * Parse string representing an AWS S3 URI */ protected void parse(String s3UriStr) { s3uri = new AmazonS3URI(s3UriStr); bucketName = s3uri.getBucket(); key = s3uri.getKey(); // Parse and set region String regionStr = s3uri.getRegion(); try { // if (regionStr == null) regionStr = Config.get().getString(Config.AWS_REGION, DEFAULT_AWS_REGION); // region = Region.getRegion(Regions.valueOf(regionStr.toUpperCase())); if (regionStr != null) region = Region.getRegion(Regions.valueOf(regionStr.toUpperCase())); } catch (Exception e) { throw new RuntimeException("Cannot parse AWS region '" + regionStr + "'", e); } }
public BucketKey(String inputFilePath, String outputFilePath){ AmazonS3URI srcURI = new AmazonS3URI(inputFilePath); AmazonS3URI destURI = new AmazonS3URI(outputFilePath); this.srcBucket = srcURI.getBucket(); this.srcKey = srcURI.getKey(); this.destBucket = destURI.getBucket(); this.destPrefix = destURI.getKey(); }
/** * Reads a file from S3 into a String object * @param s3Uri (eg. s3://bucket/file.ext) * @return String containing the content of the file in S3 * @throws IOException if error reading file */ public String readFileFromS3(String s3Uri) throws IOException { AmazonS3URI s3FileUri = new AmazonS3URI(s3Uri); S3Object s3object = amazonS3Client.getObject(new GetObjectRequest(s3FileUri.getBucket(), s3FileUri.getKey())); return IOUtils.toString(s3object.getObjectContent()); }
public static AmazonS3URI toAmazonS3URI(URI uri) { if (FS_PROTOCOL_S3.equalsIgnoreCase(uri.getScheme())) { return new AmazonS3URI(uri); } else if (S3Schemes.isS3Scheme(uri.getScheme())) { try { return new AmazonS3URI(new URI(FS_PROTOCOL_S3, uri.getUserInfo(), uri.getHost(), uri.getPort(), uri.getPath(), uri.getQuery(), uri.getFragment())); } catch (URISyntaxException e) { // ignore it, it will fail on the return } } // Build it anyway we'll get AmazonS3URI exception back. return new AmazonS3URI(uri); }
@Override public AmazonS3 newInstance(AmazonS3URI uri, S3S3CopierOptions s3s3CopierOptions) { LOG.debug("trying to get a client for uri '{}'", uri); AmazonS3 globalClient = newGlobalInstance(s3s3CopierOptions); try { String bucketRegion = regionForUri(globalClient, uri); LOG.debug("Bucket region: {}", bucketRegion); return newInstance(bucketRegion, s3s3CopierOptions); } catch (IllegalArgumentException e) { LOG.warn("Using global (non region specific) client", e); return globalClient; } }
private void copy(AmazonS3URI source, AmazonS3URI target) { ListObjectsRequest request = listObjectsRequestFactory.newInstance().withBucketName(source.getBucket()).withPrefix( source.getKey()); ObjectListing listing = srcClient.listObjects(request); submitCopyJobsFromListing(source, target, request, listing); while (listing.isTruncated()) { listing = srcClient.listNextBatchOfObjects(listing); submitCopyJobsFromListing(source, target, request, listing); } }
private void submitCopyJobsFromListing( AmazonS3URI sourceS3Uri, final AmazonS3URI targetS3Uri, ListObjectsRequest request, ObjectListing listing) { LOG.debug("Found objects to copy {}, for request {}/{}", listing.getObjectSummaries(), request.getBucketName(), request.getPrefix()); List<S3ObjectSummary> objectSummaries = listing.getObjectSummaries(); for (final S3ObjectSummary s3ObjectSummary : objectSummaries) { String fileName = StringUtils.removeStart(s3ObjectSummary.getKey(), sourceS3Uri.getKey()); final String targetKey = Strings.nullToEmpty(targetS3Uri.getKey()) + fileName; LOG.info("copying object from '{}/{}' to '{}/{}'", s3ObjectSummary.getBucketName(), s3ObjectSummary.getKey(), targetS3Uri.getBucket(), targetKey); CopyObjectRequest copyObjectRequest = new CopyObjectRequest(s3ObjectSummary.getBucketName(), s3ObjectSummary.getKey(), targetS3Uri.getBucket(), targetKey); TransferStateChangeListener stateChangeListener = new TransferStateChangeListener() { @Override public void transferStateChanged(Transfer transfer, TransferState state) { if (state == TransferState.Completed) { // NOTE: running progress doesn't seem to be reported correctly. // transfer.getProgress().getBytesTransferred() is always 0. Unsure what is the cause of this at this moment // so just printing total bytes when completed. LOG.debug("copied object from '{}/{}' to '{}/{}': {} bytes transferred", s3ObjectSummary.getBucketName(), s3ObjectSummary.getKey(), targetS3Uri.getBucket(), targetKey, transfer.getProgress().getTotalBytesToTransfer()); } } }; Copy copy = transferManager.copy(copyObjectRequest, srcClient, stateChangeListener); totalBytesToReplicate += copy.getProgress().getTotalBytesToTransfer(); copyJobs.add(copy); } }
@Before public void setUp() throws Exception { inputData = temp.newFile("data"); Files.write("bar foo", inputData, Charsets.UTF_8); client = newClient(); client.createBucket("source"); client.createBucket("target"); when(s3ClientFactory.newInstance(any(AmazonS3URI.class), any(S3S3CopierOptions.class))).thenReturn(newClient()); }
private AmazonS3 newS3Client(String tableUri) { AmazonS3URI base = toAmazonS3URI(URI.create(tableUri)); S3S3CopierOptions s3s3CopierOptions = new S3S3CopierOptions(ImmutableMap .<String, Object> builder() .put(S3S3CopierOptions.Keys.S3_ENDPOINT_URI.keyName(), s3Proxy.getProxyUrl()) .build()); return s3ClientFactory.newInstance(base, s3s3CopierOptions); }
ParallelPartsLoader(AmazonS3URI uri, long from, long to, S3InputStreamFactory factory, BlockingQueue<Future<Optional<byte[]>>> tasksQueue) { this.threadPool = ExecutorsFactory.getTasksExecutor(); this.from = from; this.to = to; this.uri = uri; this.factory = factory; this.tasksQueue = tasksQueue; threadPool.execute(this); }
public S3ParallelStream(AmazonS3URI uri, long from, long to, S3InputStreamFactory factory) { taskProducer = new ParallelPartsLoader(uri, from, to, factory); currentDataChunck = new byte[0]; }
PartReader(AmazonS3URI uri, long from, long to, AtomicBoolean canceledFlag, S3InputStreamFactory factory) { this.canceledFlag = canceledFlag; this.uri = uri; this.from = from; this.to = to; this.threadName = "[" + from + " : " + to + "](" + uri.toString() + ")"; this.factory = factory; }
/** * A method that returns an Optional of the set custom index URI. * * @return index URI */ Optional<AmazonS3URI> providedIndexURI() { return Configuration.getIndexCustomUrl().map(url -> { AmazonS3URI uri = new AmazonS3URI(url.toString()); if (!client.isFileExisting(uri)) { throw new IllegalArgumentException("Provided index file doesn't exist."); } return uri; }); }
/** * A method for when no custom index URI is set. Try find index file with same name and location. * * @param bamURI the BAM file URI. * @return Optional of index URI. */ Optional<AmazonS3URI> nearbyIndexURI(AmazonS3URI bamURI) { String uri = bamURI.toString(); String uriWithNoFormat = uri.substring(0, uri.length() - BAM_EXTENSION_LENGTH); Optional<AmazonS3URI> indBamBai = Optional.of(new AmazonS3URI(uriWithNoFormat + BAM_BAI_EXTENSION)) .filter(client::isFileExisting); Optional<AmazonS3URI> indBai = Optional.of(new AmazonS3URI(uriWithNoFormat + BAI_EXTENSION)) .filter(client::isFileExisting); return indBamBai.map(Optional::of).orElseGet(() -> indBai); }
@Test public void testMaybeNearbyIndex() { mock = mockIsFileExisting("other.bam.bai"); loader = new IndexLoader(mock); assertTrue(loader.nearbyIndexURI(new AmazonS3URI(OTHER_BAM)).isPresent()); mock = mockIsFileExisting("small.bam"); loader = new IndexLoader(mock); assertFalse(loader.nearbyIndexURI(new AmazonS3URI(SMALL_BAM)).isPresent()); }
private S3Client mockIsFileExisting(String key) { S3Client mock = Mockito.mock(S3Client.class); Mockito.when(mock.isFileExisting(Mockito.any(AmazonS3URI.class))) .then(invocation -> { AmazonS3URI uri = (AmazonS3URI) invocation.getArguments()[0]; return key.equals(uri.getKey()); }); return mock; }
public static void mockIOException(S3InputStreamFactory factory) { PowerMockito.mockStatic(S3InputStreamFactory.class); PowerMockito .when(factory.loadFromTo( Mockito.any(AmazonS3URI.class), Mockito.anyLong(), Mockito.anyLong())) .then((invocation -> new InputStream() { @Override public int read() throws IOException { exceptionsCount++; throw new IOException("Hello from Mockito!"); } })); }
@Before public void mockDataStream() { client = Mockito.mock(S3Client.class); Mockito.when(client.getFileSize(Mockito.any(AmazonS3URI.class))).thenReturn(FILE_SIZE); factory = Mockito.mock(S3InputStreamFactory.class); S3DataLoaderMocker.mockAutoSeqLoadFromTo(factory); }
@Test public void streamShouldReturnEOF() throws IOException { long fileSize = 1042 * 1042; PowerMockito.when(client.getFileSize(Mockito.any(AmazonS3URI.class))).thenReturn(fileSize); S3SeekableStream fakeSeekable = new S3SeekableStream(S3DataLoaderMocker.FAKE_URI, client, factory); for (int i = 0; i < fileSize; i++) { final int expectedByte = i & (0xff); assertEquals(expectedByte, fakeSeekable.read()); } assertEquals(-1, fakeSeekable.read()); }
Filer(AmazonS3Client s3, Optional<AmazonS3URI> staging, Workspace workspace, TemplateEngine templateEngine, Config params) { this.s3 = s3; this.staging = staging; this.workspace = workspace; this.templateEngine = templateEngine; this.params = params; }
RemoteFile prepareRemoteFile(String tag, String section, String path, FileReference reference, boolean template, String localDir) { String id = randomTag(s -> !ids.add(s)); String prefix = tag + "/" + section + "/" + path + "/" + id; if (localDir == null) { localDir = LOCAL_STAGING_DIR + "/" + prefix; } ImmutableRemoteFile.Builder builder = ImmutableRemoteFile.builder() .reference(reference) .localPath(localDir + "/" + reference.filename()); if (reference.local()) { // Local file? Then we need to upload it to S3. if (!staging.isPresent()) { throw new ConfigException("Please configure a S3 'staging' directory"); } String baseKey = staging.get().getKey(); String key = (baseKey != null ? baseKey : "") + prefix + "/" + reference.filename(); builder.s3Uri(new AmazonS3URI("s3://" + staging.get().getBucket() + "/" + key)); } else { builder.s3Uri(new AmazonS3URI(reference.reference().get())); } RemoteFile remoteFile = builder.build(); if (reference.local()) { files.add(StagingFile.of(template, remoteFile)); } return remoteFile; }
protected void validateTdSparkQueryOutput() { AmazonS3URI resultUri = new AmazonS3URI(tmpS3FolderUri.toString() + "/result/"); ObjectListing resultListing = s3.listObjects(new ListObjectsRequest().withBucketName(resultUri.getBucket()).withPrefix(resultUri.getKey())); List<String> resultLines = resultListing.getObjectSummaries().stream().flatMap(o -> { try (S3Object object = s3.getObject(o.getBucketName(), o.getKey())) { return CharStreams.readLines(new InputStreamReader(object.getObjectContent())).stream(); } catch (IOException e) { throw Throwables.propagate(e); } }).collect(toList()); assertThat(resultLines, Matchers.hasItem(",164.54.104.106,/item/games/4663,/category/electronics,404,Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0),121,GET,1412383598")); }
@Test public void testS3URIValues() { // Verify that S3URI values will work: AmazonS3URI uri1 = new AmazonS3URI("s3://our.data.warehouse/prod/client_actions"); assertNotNull(uri1.getKey()); assertNotNull(uri1.getBucket()); assertEquals(uri1.toString(), "s3://our.data.warehouse/prod/client_actions"); assertEquals(uri1.getBucket(), "our.data.warehouse"); assertEquals(uri1.getKey(), "prod/client_actions"); assertTrue(uri1.getRegion() == null); // show info: log.info("Tested out URI1 : " + uri1.toString()); AmazonS3URI uri2 = new AmazonS3URI("s3://some.big.bucket/long/complex/path"); assertNotNull(uri2.getKey()); assertNotNull(uri2.getBucket()); assertEquals(uri2.toString(), "s3://some.big.bucket/long/complex/path"); assertEquals(uri2.getBucket(), "some.big.bucket"); assertEquals(uri2.getKey(), "long/complex/path"); assertTrue(uri2.getRegion() == null); // info: log.info("Tested out URI2 : " + uri2.toString()); AmazonS3URI uri3 = new AmazonS3URI("s3://presto.kinesis.config/unit-test/presto-kinesis"); assertNotNull(uri3.getKey()); assertNotNull(uri3.getBucket()); assertEquals(uri3.toString(), "s3://presto.kinesis.config/unit-test/presto-kinesis"); assertEquals(uri3.getBucket(), "presto.kinesis.config"); assertEquals(uri3.getKey(), "unit-test/presto-kinesis"); }
private static String getRegion(String endpointUrl) { Assert.notNull(endpointUrl, "Endpoint Url must not be null"); try { URI uri = new URI(endpointUrl); if ("s3.amazonaws.com".equals(uri.getHost())) { return Regions.DEFAULT_REGION.getName(); } else { return new AmazonS3URI(endpointUrl).getRegion(); } } catch (URISyntaxException e) { throw new RuntimeException("Malformed URL received for endpoint", e); } }
@Test public void toAmazonS3URISchemeIsS3() throws Exception { AmazonS3URI result = AmazonS3URIs.toAmazonS3URI(new URI("s3://a/b")); AmazonS3URI expected = new AmazonS3URI("s3://a/b"); assertThat(result, is(expected)); }
@Test public void toAmazonS3URISchemeIsS3Uppercase() throws Exception { AmazonS3URI result = AmazonS3URIs.toAmazonS3URI(new URI("S3://a/b")); AmazonS3URI expected = new AmazonS3URI("s3://a/b"); assertThat(result, is(expected)); }
@Test public void toAmazonS3URISchemeIsS3a() throws Exception { AmazonS3URI result = AmazonS3URIs.toAmazonS3URI(new URI("s3a://a/b")); AmazonS3URI expected = new AmazonS3URI("s3://a/b"); assertThat(result, is(expected)); }
@Test public void toAmazonS3URISchemeIsS3aUppercase() throws Exception { AmazonS3URI result = AmazonS3URIs.toAmazonS3URI(new URI("S3A://a/b")); AmazonS3URI expected = new AmazonS3URI("s3://a/b"); assertThat(result, is(expected)); }
@Test public void toAmazonS3URISchemeIsS3n() throws Exception { AmazonS3URI result = AmazonS3URIs.toAmazonS3URI(new URI("s3n://a/b")); AmazonS3URI expected = new AmazonS3URI("s3://a/b"); assertThat(result, is(expected)); }