@Override public CrawlerHttpClient gen(CrawlerHttpClientBuilder proxyFeedBackDecorateHttpClientBuilder) { SocketConfig socketConfig = SocketConfig.custom().setSoKeepAlive(true).setSoLinger(-1).setSoReuseAddress(false) .setSoTimeout(ProxyConstant.SOCKETSO_TIMEOUT).setTcpNoDelay(true).build(); return proxyFeedBackDecorateHttpClientBuilder .setDefaultSocketConfig(socketConfig) // .setSSLSocketFactory(sslConnectionSocketFactory) // dungproxy0.0.6之后的版本,默认忽略https证书检查 .setRedirectStrategy(new LaxRedirectStrategy()) //注意,这里使用ua生产算法自动产生ua,如果是mobile,可以使用 // com.virjar.vscrawler.core.net.useragent.UserAgentBuilder.randomAppUserAgent() .setUserAgent(UserAgentBuilder.randomUserAgent()) //对于爬虫来说,连接池没啥卵用,直接禁止掉(因为我们可能创建大量HttpClient,每个HttpClient一个连接池,会把系统socket资源撑爆) //测试开80个httpClient抓数据大概一个小时系统就会宕机 .setConnectionReuseStrategy(NoConnectionReuseStrategy.INSTANCE) .build(); }
public RequestListenerThread(int port, final String docroot) throws IOException { this.serversocket = new ServerSocket(port); this.params = new BasicHttpParams(); this.params.setIntParameter(CoreConnectionPNames.SO_TIMEOUT, 1000).setIntParameter(CoreConnectionPNames.SOCKET_BUFFER_SIZE, 8 * 1024) .setBooleanParameter(CoreConnectionPNames.STALE_CONNECTION_CHECK, false).setBooleanParameter(CoreConnectionPNames.TCP_NODELAY, true) .setParameter(CoreProtocolPNames.ORIGIN_SERVER, "HttpComponents/1.1"); // Set up the HTTP protocol processor HttpProcessor httpproc = new BasicHttpProcessor(); // Set up request handlers HttpRequestHandlerRegistry reqistry = new HttpRequestHandlerRegistry(); reqistry.register("*", new HttpFileHandler(docroot)); // Set up the HTTP service this.httpService = new HttpService(httpproc, new NoConnectionReuseStrategy(), new DefaultHttpResponseFactory()); this.httpService.setParams(this.params); this.httpService.setHandlerResolver(reqistry); }
@Override protected ConnectionReuseStrategy createConnectionReuseStrategy() { String s = System.getProperty("http.keepAlive"); if ("true".equalsIgnoreCase(s)) { return new DefaultConnectionReuseStrategy(); } else { return new NoConnectionReuseStrategy(); } }
/** Creates a new fetching thread. * * @param frontier a reference to the {@link Frontier}. * @param index the index of this thread (only for logging purposes). */ public FetchingThread(final Frontier frontier, final int index) throws NoSuchAlgorithmException, IllegalArgumentException, IOException { setName(this.getClass().getSimpleName() + '-' + index); setPriority(Thread.MIN_PRIORITY); // Low priority; there will be thousands of this guys around. this.frontier = frontier; final BasicHttpClientConnectionManager connManager = new BasicHttpClientConnectionManagerWithAlternateDNS(frontier.rc.dnsResolver); connManager.closeIdleConnections(0, TimeUnit.MILLISECONDS); connManager.setConnectionConfig(ConnectionConfig.custom().setBufferSize(8 * 1024).build()); // TODO: make this configurable cookieStore = new BasicCookieStore(); BasicHeader[] headers = { new BasicHeader("From", frontier.rc.userAgentFrom), new BasicHeader("Accept","text/html,application/xhtml+xml,application/xml;q=0.95,text/*;q=0.9,*/*;q=0.8") }; httpClient = HttpClients.custom() .setSSLContext(frontier.rc.acceptAllCertificates ? TRUST_ALL_CERTIFICATES_SSL_CONTEXT : TRUST_SELF_SIGNED_SSL_CONTEXT) .setConnectionManager(connManager) .setConnectionReuseStrategy(frontier.rc.keepAliveTime == 0 ? NoConnectionReuseStrategy.INSTANCE : DefaultConnectionReuseStrategy.INSTANCE) .setUserAgent(frontier.rc.userAgent) .setDefaultCookieStore(cookieStore) .setDefaultHeaders(ObjectArrayList.wrap(headers)) .build(); fetchData = new FetchData(frontier.rc); }
public static CloseableHttpClient getHttpClient(final HttpHost proxy, final boolean redirects, final CookieStore cookieStore) { final Builder builder = RequestConfig.custom() .setRedirectsEnabled(redirects) .setMaxRedirects(5); if (proxy != null) builder.setProxy(proxy); final RequestConfig requestConfig = builder.build(); return HttpClients.custom() .setDefaultRequestConfig(requestConfig) .setDefaultCookieStore(cookieStore) .setConnectionReuseStrategy(NoConnectionReuseStrategy.INSTANCE) .build(); }
private CloseableHttpClient createCloseableHttpClient() throws Exception { HttpClientBuilder builder = HttpClientBuilder.create(); builder.useSystemProperties(); builder.setConnectionReuseStrategy(NoConnectionReuseStrategy.INSTANCE); builder.setSSLContext(SSLContext.getDefault()); CloseableHttpClient hc = builder.build(); return hc; }
@Override public HttpClient create() { if (this.httpConnectionManagerProvider.getManager() == null) { return HttpClients.custom().setConnectionReuseStrategy(NoConnectionReuseStrategy.INSTANCE).build(); } return HttpClients.custom().setConnectionManager(this.httpConnectionManagerProvider.getManager()).build(); }
public ListenerThread(final ApiServer requestHandler, final int port) { try { _serverSocket = new ServerSocket(port); } catch (final IOException ioex) { s_logger.error("error initializing api server", ioex); return; } _params = new BasicHttpParams(); _params.setIntParameter(CoreConnectionPNames.SO_TIMEOUT, 30000) .setIntParameter(CoreConnectionPNames.SOCKET_BUFFER_SIZE, 8 * 1024) .setBooleanParameter(CoreConnectionPNames.STALE_CONNECTION_CHECK, false) .setBooleanParameter(CoreConnectionPNames.TCP_NODELAY, true) .setParameter(CoreProtocolPNames.ORIGIN_SERVER, "HttpComponents/1.1"); // Set up the HTTP protocol processor final BasicHttpProcessor httpproc = new BasicHttpProcessor(); httpproc.addInterceptor(new ResponseDate()); httpproc.addInterceptor(new ResponseServer()); httpproc.addInterceptor(new ResponseContent()); httpproc.addInterceptor(new ResponseConnControl()); // Set up request handlers final HttpRequestHandlerRegistry reqistry = new HttpRequestHandlerRegistry(); reqistry.register("*", requestHandler); // Set up the HTTP service _httpService = new HttpService(httpproc, new NoConnectionReuseStrategy(), new DefaultHttpResponseFactory()); _httpService.setParams(_params); _httpService.setHandlerResolver(reqistry); }
@Override protected ConnectionReuseStrategy createConnectionReuseStrategy() { final String s = System.getProperty("http.keepAlive", "true"); if ("true".equalsIgnoreCase(s)) { return new DefaultConnectionReuseStrategy(); } else { return new NoConnectionReuseStrategy(); } }
/** * Get a default HttpClient based on the HttpConfiguration object. If required the defaults can * be altered to meet the requirements of the SDK user. The default client does not use connection * pooling and does not reuse connections. Timeouts for connection and socket are taken from the * {@link HttpConfiguration} object. * * @param httpConfiguration * @return CloseableHttpClient */ public static CloseableHttpClient getDefaultClient(HttpConfiguration httpConfiguration) { RequestConfig requestConfig = RequestConfig.custom().setConnectTimeout(httpConfiguration.getTimeout()) .setSocketTimeout(httpConfiguration.getTimeout()).build(); HttpClientConnectionManager connectionManager = new BasicHttpClientConnectionManager(); ConnectionReuseStrategy connectionResuseStrategy = new NoConnectionReuseStrategy(); logger.debug("Creating HttpClient with simple no pooling/no connection reuse default settings."); CloseableHttpClient httpClient = HttpClients.custom().setDefaultRequestConfig(requestConfig).setConnectionManager(connectionManager) .setConnectionReuseStrategy(connectionResuseStrategy).build(); return httpClient; }
/** * @param listener Log listener * @param prompt Prompt for proxy credentials * @return Builder for HTTP client */ public HttpClientBuilder build(final TranscriptListener listener, final LoginCallback prompt) { final HttpClientBuilder configuration = HttpClients.custom(); // Use HTTP Connect proxy implementation provided here instead of // relying on internal proxy support in socket factory final Proxy proxy = proxyFinder.find(host); switch(proxy.getType()) { case HTTP: case HTTPS: final HttpHost h = new HttpHost(proxy.getHostname(), proxy.getPort(), StringUtils.lowerCase(proxy.getType().name())); if(log.isInfoEnabled()) { log.info(String.format("Setup proxy %s", h)); } configuration.setProxy(h); configuration.setProxyAuthenticationStrategy(new CallbackProxyAuthenticationStrategy(ProxyCredentialsStoreFactory.get(), host, prompt)); break; } configuration.setUserAgent(new PreferencesUseragentProvider().get()); final int timeout = preferences.getInteger("connection.timeout.seconds") * 1000; configuration.setDefaultSocketConfig(SocketConfig.custom() .setTcpNoDelay(true) .setSoTimeout(timeout) .build()); configuration.setDefaultRequestConfig(this.createRequestConfig(timeout)); final String encoding; if(null == host.getEncoding()) { encoding = preferences.getProperty("browser.charset.encoding"); } else { encoding = host.getEncoding(); } configuration.setDefaultConnectionConfig(ConnectionConfig.custom() .setBufferSize(preferences.getInteger("http.socket.buffer")) .setCharset(Charset.forName(encoding)) .build()); if(preferences.getBoolean("http.connections.reuse")) { configuration.setConnectionReuseStrategy(new DefaultClientConnectionReuseStrategy()); } else { configuration.setConnectionReuseStrategy(new NoConnectionReuseStrategy()); } configuration.setRetryHandler(new ExtendedHttpRequestRetryHandler(preferences.getInteger("http.connections.retry"))); configuration.setServiceUnavailableRetryStrategy(new DisabledServiceUnavailableRetryStrategy()); if(!preferences.getBoolean("http.compression.enable")) { configuration.disableContentCompression(); } configuration.setRequestExecutor(new LoggingHttpRequestExecutor(listener)); // Always register HTTP for possible use with proxy. Contains a number of protocol properties such as the // default port and the socket factory to be used to create the java.net.Socket instances for the given protocol configuration.setConnectionManager(this.createConnectionManager(this.createRegistry())); configuration.setDefaultAuthSchemeRegistry(RegistryBuilder.<AuthSchemeProvider>create() .register(AuthSchemes.BASIC, new BasicSchemeFactory( Charset.forName(preferences.getProperty("http.credentials.charset")))) .register(AuthSchemes.DIGEST, new DigestSchemeFactory( Charset.forName(preferences.getProperty("http.credentials.charset")))) .register(AuthSchemes.NTLM, new NTLMSchemeFactory()) .register(AuthSchemes.SPNEGO, new SPNegoSchemeFactory()) .register(AuthSchemes.KERBEROS, new KerberosSchemeFactory()).build()); return configuration; }
protected FedX(Config config, Cache cache, Statistics statistics, EndpointListProvider endpointListProvider, SummaryProvider summaryProvider) { this.config = config; this.cache = cache; this.statistics = statistics; this.endpointListProvider = endpointListProvider; this.summaryProvider = summaryProvider; // initialize httpclient parameters HttpClientBuilder httpClientBuilder = HttpClientBuilders.getSSLTrustAllHttpClientBuilder(); httpClientBuilder.setMaxConnTotal(config.getMaxHttpConnectionCount()); httpClientBuilder.setMaxConnPerRoute(config.getMaxHttpConnectionCountPerRoute()); //httpClientBuilder.evictExpiredConnections(); httpClientBuilder.setConnectionReuseStrategy(new NoConnectionReuseStrategy()); //httpClientBuilder.setConnectionTimeToLive(1000, TimeUnit.MILLISECONDS); //httpClientBuilder.disableAutomaticRetries(); // httpClientBuilder.setKeepAliveStrategy(new ConnectionKeepAliveStrategy(){ // // @Override // public long getKeepAliveDuration(HttpResponse response, HttpContext context) { // return 0; // }}); httpClient = httpClientBuilder.build(); synchronized (log) { if (monitoring == null) { monitoring = MonitoringFactory.createMonitoring(config); } } executor = Executors.newCachedThreadPool(); scheduler = new ControlledWorkerScheduler(config.getWorkerThreads(), "Evaluation Scheduler"); if (log.isDebugEnabled()) { log.debug("Scheduler for async operations initialized with " + config.getWorkerThreads() + " worker threads."); } // initialize prefix declarations, if any String prefixFile = config.getPrefixDeclarations(); if (prefixFile != null) { prefixDeclarations = new Properties(); try { prefixDeclarations.load(new FileInputStream(new File(prefixFile))); } catch (IOException e) { throw new FedXRuntimeException("Error loading prefix properties: " + e.getMessage()); } } open = true; }
/** * Build an HttpClient * * @param customiser * * @return */ public CloseableHttpClient createHttpClient(final Consumer<HttpClientBuilder> customiser) { final HttpClientBuilder builder = HttpClientBuilder.create(); // By default set long call timeouts { RequestConfig.Builder requestBuilder = RequestConfig.custom(); requestBuilder.setConnectTimeout((int) connectionTimeout.getMilliseconds()) .setSocketTimeout((int) socketTimeout.getMilliseconds()); builder.setDefaultRequestConfig(requestBuilder.build()); } // Set the default keepalive setting if (noKeepalive) builder.setConnectionReuseStrategy(new NoConnectionReuseStrategy()); // By default share the common connection provider builder.setConnectionManager(connectionManager); // By default use the JRE default route planner for proxies builder.setRoutePlanner(new SystemDefaultRoutePlanner(ProxySelector.getDefault())); // If a correlation id is set locally then make sure we pass it along to the remote service // N.B. we use the value from the MDC because the correlation id could be for a internal task builder.addInterceptorFirst(new HttpRequestInterceptor() { @Override public void process(final HttpRequest request, final HttpContext context) throws HttpException, IOException { final String traceId = MDC.get(LoggingMDCConstants.TRACE_ID); if (traceId != null) request.addHeader("X-Correlation-ID", traceId); } }); // Allow customisation if (customiser != null) customiser.accept(builder); return builder.build(); }