我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用asyncio.Semaphore()。
def run_scraping(url, timestamps, scrape_function, concurrency, user_agent): """ Run the scraping function asynchronously on the given archives. The concurrency parameter limits the number of concurrent connections to the web archive. """ # Use a semaphore to limit the number of concurrent connections to the internet archive sem = asyncio.Semaphore(concurrency) # Use one session to benefit from connection pooling async with aiohttp.ClientSession(headers={'User-Agent': user_agent}) as session: # Create scraping coroutines for each archive coroutines = [scrape_archive(session, url, timestamp, scrape_function, sem) for timestamp in timestamps] # Wait for coroutines to finish and gather the results results = await asyncio.gather(*coroutines) # Compile each valid scraping results in a dictionary return {timestamp: result for timestamp, result in results if result is not None}
def test_repr(self): sem = asyncio.Semaphore(loop=self.loop) self.assertTrue(repr(sem).endswith('[unlocked,value:1]>')) self.assertTrue(RGX_REPR.match(repr(sem))) self.loop.run_until_complete(sem.acquire()) self.assertTrue(repr(sem).endswith('[locked]>')) self.assertTrue('waiters' not in repr(sem)) self.assertTrue(RGX_REPR.match(repr(sem))) sem._waiters.append(mock.Mock()) self.assertTrue('waiters:1' in repr(sem)) self.assertTrue(RGX_REPR.match(repr(sem))) sem._waiters.append(mock.Mock()) self.assertTrue('waiters:2' in repr(sem)) self.assertTrue(RGX_REPR.match(repr(sem)))
def test_semaphore(self): sem = asyncio.Semaphore(loop=self.loop) self.assertEqual(1, sem._value) @asyncio.coroutine def acquire_lock(): return (yield from sem) res = self.loop.run_until_complete(acquire_lock()) self.assertTrue(res) self.assertTrue(sem.locked()) self.assertEqual(0, sem._value) sem.release() self.assertFalse(sem.locked()) self.assertEqual(1, sem._value)
def get_picture_urls(dates, verbose=False): semaphore = asyncio.Semaphore(MAX_CONCURRENT_REQUESTS) tasks = [get_picture_url(date, semaphore) for date in dates] urls = [] count = 0 # get results as jobs are done for job in asyncio.as_completed(tasks, timeout=GLOBAL_TIMEOUT): try: url = yield from job except NoPictureForDate as exc: if verbose: print('*** {!r} ***'.format(exc)) continue except aiohttp.ClientResponseError as exc: print('****** {!r} ******'.format(exc)) continue count += 1 if verbose: print(format(count, '3d'), end=' ') print(url.split('/')[-1]) else: print(url) urls.append(url) return urls
def __init__(self, manager): self.log = get_logger('overseer') self.workers = [] self.manager = manager self.things_count = deque(maxlen=9) self.paused = False self.coroutines_count = 0 self.skipped = 0 self.visits = 0 self.coroutine_semaphore = Semaphore(conf.COROUTINES_LIMIT, loop=LOOP) self.redundant = 0 self.running = True self.all_seen = False self.idle_seconds = 0 self.log.info('Overseer initialized') self.pokemon_found = ''
def init(self): self._prefix = self.config.get('prefix') self._template = self.config.get('template') if self._prefix: self._prefix = URL(self._prefix) self._semaphore = asyncio.Semaphore( self.config.get('semaphore', 20), loop=self.loop) self._allow_hosts = self.config.get('allow_hosts') self._format = self.config.get('format', 'json') self._return_status = self.config.get('return_status', False) headers = self.config.get('headers') self.session_params = {} if headers: self.session_params['headers'] = dict(headers) for param in ('conn_timeout', 'read_timeout'): if param in self.config: self.session_params[param] = self.config[param] self.reset_session() self.context.on_stop.append(self.stop)
def run_commands_chain_async(self, chains: list, block=False, state_json_dir=None, delegate_extra_params={}): sem = asyncio.Semaphore(self.__parallelism) if state_json_dir: log.debug('Using default JsonDelegate method, state_json_dir {}'.format(state_json_dir)) self.async_delegate = JsonDelegate(state_json_dir, len(self.__targets), **delegate_extra_params) else: assert self.async_delegate, 'async delegate must be set' if block: log.debug('Waiting for run_command_chain_async to execute') tasks = [] for host in self.__targets: tasks.append(asyncio.async(self.dispatch_chain(host, chains, sem))) yield from asyncio.wait(tasks) log.debug('run_command_chain_async executed') return [task.result() for task in tasks] else: log.debug('Started run_command_chain_async in non-blocking mode') for host in self.__targets: asyncio.async(self.dispatch_chain(host, chains, sem))
def __init__(self, engine, data_callback, ordered=False, positions=None,\ retry_seconds=30, concurrency=64): self.engine = engine self.data_callback = data_callback self.ordered = ordered self.positions = positions self.retry_seconds = retry_seconds self.concurrency = concurrency self._task_semaphore = asyncio.Semaphore(concurrency) self._next_position = 0 self._failed = deque() self._ordered_waiters = [] self._ordered_waiters_dc = {} #FIXME: WTF is this? self._task_cnt = 0 self._tasks_done = asyncio.Event() self._abort = False
def test_acquire_cancel_before_awoken(self): sem = asyncio.Semaphore(value=0, loop=self.loop) t1 = asyncio.Task(sem.acquire(), loop=self.loop) t2 = asyncio.Task(sem.acquire(), loop=self.loop) t3 = asyncio.Task(sem.acquire(), loop=self.loop) t4 = asyncio.Task(sem.acquire(), loop=self.loop) test_utils.run_briefly(self.loop) sem.release() t1.cancel() t2.cancel() test_utils.run_briefly(self.loop) num_done = sum(t.done() for t in [t3, t4]) self.assertEqual(num_done, 1) t3.cancel() t4.cancel() test_utils.run_briefly(self.loop)
def __init__(self, env): super().__init__() self.coin = env.coin self.set_urls(env.coin.daemon_urls(env.daemon_url)) self._height = None self._mempool_hashes = set() self.mempool_refresh_event = asyncio.Event() # Limit concurrent RPC calls to this number. # See DEFAULT_HTTP_WORKQUEUE in bitcoind, which is typically 16 self.workqueue_semaphore = asyncio.Semaphore(value=10) self.down = False self.last_error_time = 0 self.req_id = 0 # assignment of asyncio.TimeoutError are essentially ignored if aiohttp.__version__.startswith('1.'): self.ClientHttpProcessingError = aiohttp.ClientHttpProcessingError self.ClientPayloadError = asyncio.TimeoutError else: self.ClientHttpProcessingError = asyncio.TimeoutError self.ClientPayloadError = aiohttp.ClientPayloadError self._available_rpcs = {} # caches results for _is_rpc_available()
def __init__(self, bot): super().__init__(bot) self.bot.simple_exc.extend([StarError, StarAddError, StarRemoveError]) # prevent race conditions self._locks = collections.defaultdict(asyncio.Lock) # janitor #: the janitor semaphore keeps things up and running # by only allowing 1 janitor task each time. # a janitor task cleans stuff out of mongo self.janitor_semaphore = asyncio.Semaphore(1) # collectiones self.starboard_coll = self.config.jose_db['starboard'] self.starconfig_coll = self.config.jose_db['starconfig']
def wait_with_progress(urlList, concurency = 30, timeout = 120, rawResults = False, cloudflare = False, headers = None): sem = asyncio.Semaphore(concurency) # Client session worker headers = headers or {} headers.update({ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36 vulners.com/bot'}) if cloudflare: sessionClient = CloudflareScraper else: sessionClient = aiohttp.ClientSession urlToResultDict = {} with sessionClient(connector=aiohttp.TCPConnector(verify_ssl=False), headers=headers) as session: coros = [parseUrl(url = d, semaphore = sem, session = session, timeout = timeout, rawResults=rawResults) for d in urlList] for f in tqdm.tqdm(asyncio.as_completed(coros), total=len(coros)): result = yield from f urlToResultDict.update(result) return urlToResultDict
def __init__(self, server): self.server = server self.role_coll = server.role_coll self.channel_coll = server.channel_coll self.guild_coll = server.guild_coll self.invite_coll = server.invite_coll self.message_coll = server.message_coll self.member_coll = server.member_coll self.raw_members = defaultdict(dict) self.roles = [] self.channels = [] self.guilds = [] self.invites = [] self.messages = [] # Semaphores self.message_semaphore = asyncio.Semaphore(3)
def _worker(self, idx: int, video_id: str, url: str) -> Optional[bytes]: async with asyncio.Semaphore(self.__parallel_limit): self.logger.info(Msg.nd_download_pict, idx + 1, len(self.glossary), video_id, self.glossary[video_id][KeyGTI.TITLE]) try: async with self.session.get(url, timeout=10) as response: self.logger.debug("Video ID: %s, Status Code: %s", video_id, response.status) if response.status == 200: # ??????????????????????????? if video_id in self.undone: self.undone.remove(video_id) return await response.content.read() else: self.undone.append(video_id) return None except asyncio.TimeoutError: self.logger.warning("%s ?????", video_id) self.undone.append(video_id) return None
def get_operations(target_uris: dict, cap_connections: bool) -> list: """Creates a list of coroutines for connecting to each of the targets, and will wrap their execution into a semaphore if cap_connections is True. """ operations = [] if cap_connections: connection_cap = asyncio.Semaphore(MAX_CONNECTIONS) for uri in target_uris: last_load = target_uris[uri][2] target_operation = fetch_uri(uri, last_load) wrapped = wrap_semaphore(target_operation, connection_cap) operations.append(wrapped) else: for uri in target_uris: last_load = target_uris[uri][2] target_operation = fetch_uri(uri, last_load) operations.append(target_operation) return operations
def run(): loop = asyncio.get_event_loop() semaphore = asyncio.Semaphore(5, loop=loop) messages = [] start_time = timeit.default_timer() for message_number in range(1000): message = generate_iso8583_message(message_number+1) messages.append(loop.create_connection(lambda: Iso8583ClientProtocol(message, loop, semaphore), '127.0.0.1', 8888)) loop.run_until_complete(messages[message_number]) loop.run_until_complete(asyncio.wait(messages)) stop_time = timeit.default_timer() print("Time:{}".format(stop_time-start_time)) loop.close()
def __init__(self, domain, options, queue = None, loop = None, dict_file = None): self.loop = loop if loop else asyncio.get_event_loop() assert self.loop is not None self.sem = asyncio.Semaphore(options.rate) self.domain = domain self.tasks = [] self.queue = queue or asyncio.Queue() self.result = [] self.dict_file = dict_file or 'subnames.txt' self.resolver = DNSResolver(loop = self.loop) self._load_sub_names()
def __init__(self, root, *args, **kwargs): super().__init__(root, *args, **kwargs) self._ready = asyncio.Event(loop=root.loop) self._get_cluster_lock = asyncio.Lock() self._vms_semaphore = asyncio.Semaphore(self.config["max_vms"])
def test_ctor_loop(self): loop = mock.Mock() sem = asyncio.Semaphore(loop=loop) self.assertIs(sem._loop, loop) sem = asyncio.Semaphore(loop=self.loop) self.assertIs(sem._loop, self.loop)
def test_ctor_noloop(self): asyncio.set_event_loop(self.loop) sem = asyncio.Semaphore() self.assertIs(sem._loop, self.loop)
def test_initial_value_zero(self): sem = asyncio.Semaphore(0, loop=self.loop) self.assertTrue(sem.locked())
def test_acquire_cancel(self): sem = asyncio.Semaphore(loop=self.loop) self.loop.run_until_complete(sem.acquire()) acquire = asyncio.Task(sem.acquire(), loop=self.loop) self.loop.call_soon(acquire.cancel) self.assertRaises( asyncio.CancelledError, self.loop.run_until_complete, acquire) self.assertFalse(sem._waiters)
def test_release_no_waiters(self): sem = asyncio.Semaphore(loop=self.loop) self.loop.run_until_complete(sem.acquire()) self.assertTrue(sem.locked()) sem.release() self.assertFalse(sem.locked())
def test_context_manager(self): sem = asyncio.Semaphore(2, loop=self.loop) @asyncio.coroutine def acquire_lock(): return (yield from sem) with self.loop.run_until_complete(acquire_lock()): self.assertFalse(sem.locked()) self.assertEqual(1, sem._value) with self.loop.run_until_complete(acquire_lock()): self.assertTrue(sem.locked()) self.assertEqual(2, sem._value)
def test_context_manager_no_yield(self): sem = asyncio.Semaphore(2, loop=self.loop) try: with sem: self.fail('RuntimeError is not raised in with expression') except RuntimeError as err: self.assertEqual( str(err), '"yield from" should be used as context manager expression') self.assertEqual(2, sem._value)
def fetch_all (r): global started sem = asyncio.Semaphore (10) tasks = [] for i in range (r): task = asyncio.ensure_future (bound_fetch (sem, "http://127.0.0.1:5000/")) tasks.append (task) started = timeit.default_timer () yield from asyncio.gather (*tasks)
def fetch_all (r): global started sem = asyncio.Semaphore (20) tasks = [] for i in range (r): task = asyncio.ensure_future (bound_fetch (sem, "http://127.0.0.1:5000/images/gif1.gif")) #task = asyncio.ensure_future (bound_fetch (sem, "http://127.0.0.1:5000/images/concept.png")) tasks.append (task) started = timeit.default_timer () yield from asyncio.gather (*tasks)
def downloader_coro(cc_list, base_url, verbose, concur_req): counter = collections.Counter() semaphore = asyncio.Semaphore(concur_req) to_do = [download_one(cc, base_url, semaphore, verbose) for cc in sorted(cc_list)] to_do_iter = asyncio.as_completed(to_do) if not verbose: to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list)) for future in to_do_iter: try: res = yield from future except FetchError as exc: country_code = exc.country_code try: error_msg = exc.__cause__.args[0] except IndexError: error_msg = exc.__cause__.__class__.__name__ if verbose and error_msg: msg = '*** Error for {}: {}' print(msg.format(country_code, error_msg)) status = HTTPStatus.error else: status = res.status counter[status] += 1 return counter
def downloader_coro(cc_list, base_url, verbose, concur_req): # <1> counter = collections.Counter() semaphore = asyncio.Semaphore(concur_req) # <2> to_do = [download_one(cc, base_url, semaphore, verbose) for cc in sorted(cc_list)] # <3> to_do_iter = asyncio.as_completed(to_do) # <4> if not verbose: to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list)) # <5> for future in to_do_iter: # <6> try: res = yield from future # <7> except FetchError as exc: # <8> country_code = exc.country_code # <9> try: error_msg = exc.__cause__.args[0] # <10> except IndexError: error_msg = exc.__cause__.__class__.__name__ # <11> if verbose and error_msg: msg = '*** Error for {}: {}' print(msg.format(country_code, error_msg)) status = HTTPStatus.error else: status = res.status counter[status] += 1 # <12> return counter # <13>
def downloader_coro(cc_list, base_url, verbose, concur_req): # <1> counter = collections.Counter() semaphore = asyncio.Semaphore(concur_req) # <2> to_do = [download_one(cc, base_url, semaphore, verbose) for cc in sorted(cc_list)] # <3> to_do_iter = asyncio.as_completed(to_do) # <4> if not verbose: to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list)) # <5> for future in to_do_iter: # <6> try: res = await future # <7> except FetchError as exc: # <8> country_code = exc.country_code # <9> try: error_msg = exc.__cause__.args[0] # <10> except IndexError: error_msg = exc.__cause__.__class__.__name__ # <11> if verbose and error_msg: msg = '*** Error for {}: {}' print(msg.format(country_code, error_msg)) status = HTTPStatus.error else: status = res.status counter[status] += 1 # <12> return counter # <13>
def __init__(self, host, port, connections, loop=None): self.host = host #: Listening host self.port = port #: Listening port self.loop = loop or event_loop.get() #: Event loop onto which the listener is running. self.server = None #: class:`asyncio.Server` instance used by the listener. self.connections = connections #: Dictionary containing all connections. asyncio.run_coroutine_threadsafe(self.start_server(), self.loop) self.started_semaphore = asyncio.Semaphore(0) self.startup_exception_info = None
def test_decoder_failure(self): """ This test checks that the decoder works properly whenever random data in given to it. :return: """ async def run(q): try: s = asyncio.Semaphore(0) def error_callback(): q.put(None) s.release() decoder = Decoder(error_callback) data = os.urandom(1000) await decoder.digest(struct.pack(">I", len(data))) await decoder.digest(data) s.acquire() q.put(None) except: q.put(sys.exc_info()) q = Queue() loop = get_event_loop() asyncio.run_coroutine_threadsafe(run(q), loop) # If you get an Empty exception over here, it means the co-routine timed out. exc_info = q.get(timeout=1) if exc_info is not None: raise exc_info[1].with_traceback(exc_info[2])
def run(cls): logger.info('Spider started!') start_time = datetime.now() loop = asyncio.get_event_loop() if cls.base_url is None: cls.base_url = re.match('(http|https)://[\w\-_]+(\.[\w\-_]+)+/', cls.start_url).group() logger.info('Base url: {}'.format(cls.base_url)) try: semaphore = asyncio.Semaphore(cls.concurrency) tasks = asyncio.wait([parser.task(cls, semaphore) for parser in cls.parsers]) loop.run_until_complete(cls.init_parse(semaphore)) loop.run_until_complete(tasks) except KeyboardInterrupt: for task in asyncio.Task.all_tasks(): task.cancel() loop.run_forever() finally: end_time = datetime.now() for parser in cls.parsers: if parser.item is not None: logger.info('Item "{}": {}'.format(parser.item.name, parser.item.count)) logger.info('Requests count: {}'.format(cls.urls_count)) logger.info('Error count: {}'.format(len(cls.error_urls))) logger.info('Time usage: {}'.format(end_time - start_time)) logger.info('Spider finished!') loop.close()
def __init__(self, max_workers=None, cancel_timeout=10.0): # type: (Optional[int], Optional[float]) -> None if max_workers is None: max_workers = multiprocessing.cpu_count() if cancel_timeout is None: cancel_timeout = 10.0 self._cancel_timeout = cancel_timeout self._semaphore = asyncio.Semaphore(max_workers)
def test_periodic_task(event_loop): """The background task repeats until we leave the context manager.""" sem = asyncio.Semaphore(0, loop=event_loop) async def task(): sem.release() with mock.patch('asyncio.sleep') as sleep: sleep.side_effect = [ make_success(None, loop=event_loop), make_success(None, loop=event_loop), make_success(None, loop=event_loop), asyncio.Future(loop=event_loop), ] async with PeriodicTask(task, 0.01, loop=event_loop): await sem.acquire() await sem.acquire() await sem.acquire() assert sleep.call_args_list == [ mock.call(0.01), mock.call(0.01), mock.call(0.01), mock.call(0.01), ]
def test_periodic_task_respawn_after_crash(event_loop): """Background task repeats despite exceptions (which are logged).""" sem = asyncio.Semaphore(0, loop=event_loop) async def task(): sem.release() raise Exception('Crash this task!') with mock.patch('asyncio.sleep') as sleep: sleep.side_effect = [ make_success(None, loop=event_loop), make_success(None, loop=event_loop), make_success(None, loop=event_loop), asyncio.Future(loop=event_loop), ] with testfixtures.LogCapture(level=logging.WARNING) as logs: async with PeriodicTask(task, 0.01, loop=event_loop): await sem.acquire() await sem.acquire() await sem.acquire() assert sleep.call_args_list == [ mock.call(0.01), mock.call(0.01), mock.call(0.01), mock.call(0.01), ] logs.check( ('root', 'ERROR', 'Executing periodic task.'), ('root', 'ERROR', 'Executing periodic task.'), ('root', 'ERROR', 'Executing periodic task.'), ('root', 'ERROR', 'Executing periodic task.'), )
def init(sanic, loop): global sem concurrency_per_worker = 4 sem = asyncio.Semaphore(concurrency_per_worker, loop=loop)
def __init__(self, limit=10, loop=None, coros=None, ignore_empty=False): self.errors = [] self.running = False self.return_exceptions = False self.limit = max(int(limit), 0) self.pool = deque() self.observer = Observer() self.ignore_empty = ignore_empty self.loop = loop or asyncio.get_event_loop() self.semaphore = asyncio.Semaphore(self.limit, loop=self.loop) # Register coroutines in the pool if isiter(coros): self.extend(*coros)
def reset(self): """ Resets the executer scheduler internal state. Raises: RuntimeError: is the executor is still running. """ if self.running: raise RuntimeError('paco: executor is still running') self.pool.clear() self.observer.clear() self.semaphore = asyncio.Semaphore(self.limit, loop=self.loop)
def __init__(self, maxsize=0): self.count = 0 self.limiter = asyncio.Semaphore(maxsize) self.empty = asyncio.Lock()
def __init__(self, maxsize=0): self.count = 0 self.limiter = asyncio.Semaphore(maxsize) self.empty = asyncio.Lock() self._objects = set()
def __init__(self, target, **kwargs): self.bucket = kwargs.get('bucket') self.region = kwargs.get('region_name') if not all((self.bucket, self.region)): raise RuntimeError('No bucket and/or region_name kwargs provided') self.target = os.path.abspath(target) if not all((os.path.exists(self.target), os.path.isdir(self.target))): msg = '{} does not exist or is not a directory.' raise FileNotFoundError(msg.format(self.target)) self.timeout = kwargs.get('timeout') self.semaphore = asyncio.Semaphore(MAX_REQUESTS) self.progress = 0 self.total = 0