我们从Python开源项目中,提取了以下22个代码示例,用于说明如何使用tornado.gen.html()。
def get_links_from_url(url): """Download the page at `url` and parse it for links. Returned links have had the fragment after `#` removed, and have been made absolute so, e.g. the URL 'gen.html#tornado.gen.coroutine' becomes 'http://www.tornadoweb.org/en/stable/gen.html'. """ try: response = yield httpclient.AsyncHTTPClient().fetch(url) print('fetched %s' % url) html = response.body if isinstance(response.body, str) \ else response.body.decode() urls = [urljoin(url, remove_fragment(new_url)) for new_url in get_links(html)] except Exception as e: print('Exception: %s %s' % (e, url)) raise gen.Return([]) raise gen.Return(urls)
def get_links_from_url(url): """Download the page at `url` and parse it for links. Returned links have had the fragment after `#` removed, and have been made absolute so, e.g. the URL 'gen.html#tornado.gen.coroutine' becomes 'http://www.tornadoweb.org/en/stable/gen.html'. """ try: response = yield httpclient.AsyncHTTPClient().fetch(url) print('fetched %s' % url) urls = [urlparse.urljoin(url, remove_fragment(new_url)) for new_url in get_links(response.body)] except Exception as e: print('Exception: %s %s' % (e, url)) raise gen.Return([]) raise gen.Return(urls)
def get_links_from_url(url): # ????url???? """Download the page at `url` and parse it for links. Returned links have had the fragment after `#` removed, and have been made absolute so, e.g. the URL 'gen.html#tornado.gen.coroutine' becomes 'http://www.tornadoweb.org/en/stable/gen.html'. """ try: response = yield httpclient.AsyncHTTPClient().fetch(url) # ????url,??url??? print('fetched %s' % url) html = response.body if isinstance(response.body, str) \ else response.body.decode() # ????????? urls = [urljoin(url, remove_fragment(new_url)) for new_url in get_links(html)] # ???url?????url except Exception as e: print('Exception: %s %s' % (e, url)) raise gen.Return([]) # Special exception to return a value from a coroutine. raise gen.Return(urls) # If this exception is raised, its value argument is used as the result of the coroutine.
def get_links(html): # ???????? class URLSeeker(HTMLParser): def __init__(self): HTMLParser.__init__(self) # ?? ???super.__init__(self) self.urls = [] def handle_starttag(self, tag, attrs): href = dict(attrs).get('href') if href and tag == 'a': self.urls.append(href) url_seeker = URLSeeker() url_seeker.feed(html) print('@@'*20) print(url_seeker.urls) print('@@'*20) return url_seeker.urls # ?????????
def handle_stream(self, stream, address): """ handle telnet connection http://www.tornadoweb.org/en/stable/gen.html#tornado-gen-simplify-asynchronous-code """ stream.write(TELNET_PROMPT_PREFIX) while True: try: command = yield stream.read_until(b'\n') result = self.handle_command(command.decode().strip()) yield stream.write(result.encode() + TELNET_PROMPT_PREFIX) except StreamClosedError: break
def get_links(html): class URLSeeker(HTMLParser): def __init__(self): HTMLParser.__init__(self) self.urls = [] def handle_starttag(self, tag, attrs): href = dict(attrs).get('href') if href and tag == 'a': self.urls.append(href) url_seeker = URLSeeker() url_seeker.feed(html) return url_seeker.urls
def get_links(html): class URLSeeker(HTMLParser.HTMLParser): def __init__(self): HTMLParser.HTMLParser.__init__(self) self.urls = [] def handle_starttag(self, tag, attrs): href = dict(attrs).get('href') if href and tag == 'a': self.urls.append(href) url_seeker = URLSeeker() url_seeker.feed(html) return url_seeker.urls
def fetch_next(self): """A Future used with `gen.coroutine`_ to asynchronously retrieve the next document in the result set, fetching a batch of documents from the server if necessary. Resolves to ``False`` if there are no more documents, otherwise :meth:`next_object` is guaranteed to return a document. .. _`gen.coroutine`: http://tornadoweb.org/en/stable/gen.html .. testsetup:: fetch_next MongoClient().test.test_collection.remove() collection = MotorClient().test.test_collection .. doctest:: fetch_next >>> @gen.coroutine ... def f(): ... yield collection.insert([{'_id': i} for i in range(5)]) ... cursor = collection.find().sort([('_id', 1)]) ... while (yield cursor.fetch_next): ... doc = cursor.next_object() ... sys.stdout.write(str(doc['_id']) + ', ') ... print 'done' ... >>> IOLoop.current().run_sync(f) 0, 1, 2, 3, 4, done .. note:: While it appears that fetch_next retrieves each document from the server individually, the cursor actually fetches documents efficiently in `large batches`_. .. _`large batches`: http://docs.mongodb.org/manual/core/read-operations/#cursor-behaviors """ future = Future() if not self._buffer_size() and self.alive: if self._empty(): # Special case, limit of 0 future.set_result(False) return future def cb(batch_size, error): if error: future.set_exception(error) else: future.set_result(bool(batch_size)) self._get_more(cb) return future elif self._buffer_size(): future.set_result(True) return future else: # Dead future.set_result(False) return future
def stream_to_handler(self, request_handler): """Write the contents of this file to a :class:`tornado.web.RequestHandler`. This method calls `flush` on the RequestHandler, so ensure all headers have already been set. For a more complete example see the implementation of :class:`~motor.web.GridFSHandler`. Takes an optional callback, or returns a Future. :Parameters: - `callback`: Optional function taking parameters (self, error) .. code-block:: python class FileHandler(tornado.web.RequestHandler): @tornado.web.asynchronous @gen.coroutine def get(self, filename): db = self.settings['db'] fs = yield motor.MotorGridFS(db()).open() try: gridout = yield fs.get_last_version(filename) except gridfs.NoFile: raise tornado.web.HTTPError(404) self.set_header("Content-Type", gridout.content_type) self.set_header("Content-Length", gridout.length) yield gridout.stream_to_handler(self) self.finish() .. seealso:: Tornado `RequestHandler <http://tornadoweb.org/en/stable/web.html#request-handlers>`_ """ written = 0 while written < self.length: # Reading chunk_size at a time minimizes buffering chunk = yield self.read(self.chunk_size) # write() simply appends the output to a list; flush() sends it # over the network and minimizes buffering in the handler. request_handler.write(chunk) request_handler.flush() written += len(chunk)