我们从Python开源项目中,提取了以下38个代码示例,用于说明如何使用urlparse.urldefrag()。
def test_urldefrag(self): str_cases = [ ('http://python.org#frag', 'http://python.org', 'frag'), ('http://python.org', 'http://python.org', ''), ('http://python.org/#frag', 'http://python.org/', 'frag'), ('http://python.org/', 'http://python.org/', ''), ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'), ('http://python.org/?q', 'http://python.org/?q', ''), ('http://python.org/p#frag', 'http://python.org/p', 'frag'), ('http://python.org/p?q', 'http://python.org/p?q', ''), (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'), (RFC2396_BASE, 'http://a/b/c/d;p?q', ''), ] def _encode(t): return type(t)(x.encode('ascii') for x in t) bytes_cases = [_encode(x) for x in str_cases] for url, defrag, frag in str_cases + bytes_cases: result = urlparse.urldefrag(url) self.assertEqual(result.geturl(), url) self.assertEqual(result, (defrag, frag)) self.assertEqual(result.url, defrag) self.assertEqual(result.fragment, frag)
def oa_to_standoff(annotations, target_key='target'): """Convert OA annotations to Standoff objects.""" standoffs = [] for annotation in annotations: target = annotation[target_key] # assume target is current doc, ignore all but fragment. fragment = urlparse.urldefrag(target)[1] try: start_end = fragment.split('=', 1)[1] start, end = start_end.split(',') except IndexError: warn('failed to parse target %s' % target) start, end = 0, 1 for type_, norm in _parse_body(annotation): standoffs.append(Standoff(int(start), int(end), type_, norm)) return standoffs
def _pre_visit_url_condense(self, url): """ Reduce (condense) URLs into some canonical form before visiting. All occurrences of equivalent URLs are treated as identical. All this does is strip the \"fragment\" component from URLs, so that http://foo.com/blah.html\#baz becomes http://foo.com/blah.html """ base, frag = urlparse.urldefrag(url) return base ## URL Filtering functions. These all use information from the ## state of the Crawler to evaluate whether a given URL should be ## used in some context. Return value of True indicates that the ## URL should be used.
def remove_fragment(url): pure_url, _ = urldefrag(url) return pure_url
def normalize(seed_url, link): """Normalize this URL by removing hash and adding domain """ link, _ = urlparse.urldefrag(link) # remove hash to avoid duplicates return urlparse.urljoin(seed_url, link)
def getDisplayIdentifier(self): """Return the display_identifier if set, else return the claimed_id. """ if self.display_identifier is not None: return self.display_identifier if self.claimed_id is None: return None else: return urlparse.urldefrag(self.claimed_id)[0]
def normalizeURL(url): """Normalize a URL, converting normalization failures to DiscoveryFailure""" try: normalized = urinorm.urinorm(url) except ValueError, why: raise DiscoveryFailure('Normalizing identifier: %s' % (why[0],), None) else: return urlparse.urldefrag(normalized)[0]
def test_urldefrag(self): for url, defrag, frag in [ ('http://python.org#frag', 'http://python.org', 'frag'), ('http://python.org', 'http://python.org', ''), ('http://python.org/#frag', 'http://python.org/', 'frag'), ('http://python.org/', 'http://python.org/', ''), ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'), ('http://python.org/?q', 'http://python.org/?q', ''), ('http://python.org/p#frag', 'http://python.org/p', 'frag'), ('http://python.org/p?q', 'http://python.org/p?q', ''), (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'), (RFC2396_BASE, 'http://a/b/c/d;p?q', ''), ]: self.assertEqual(urlparse.urldefrag(url), (defrag, frag))
def normalize_url(url): # TODO: learn from https://github.com/hypothesis/h/blob/master/h/api/uri.py try: url = urlnorm.norm(url) url, _ = urldefrag(url) url = re.sub('[\n\r]', '', url) url = url.rstrip('/') return url except: return None
def remove_fragment(url): pure_url, frag = urldefrag(url) # ??#? return pure_url
def remove_fragment(url): pure_url, frag = urldefrag(url) return pure_url
def _urljoin(base, url): """ Construct a full ("absolute") URL by combining a "base URL" with another URL. Informally, this uses components of the base URL, in particular the addressing scheme, the network location and (part of) the path, to provide missing components in the relative URL. Additionally, the fragment identifier is preserved according to the HTTP 1.1 bis draft. @type base: C{bytes} @param base: Base URL. @type url: C{bytes} @param url: URL to combine with C{base}. @return: An absolute URL resulting from the combination of C{base} and C{url}. @see: L{urlparse.urljoin} @see: U{https://tools.ietf.org/html/draft-ietf-httpbis-p2-semantics-22#section-7.1.2} """ base, baseFrag = urldefrag(base) url, urlFrag = urldefrag(urljoin(base, url)) return urljoin(url, b'#' + (urlFrag or baseFrag))
def _add_link(self,a_link): if not self._redis_enable: task.url_task.extend(a_link) print('Add link to memory') return self._log.debug("putting url into redis %s " % self.name) for a_l in a_link: #pass self._r.lpush(self._r.hget(self.name,codes.url),urlparse.urldefrag(a_l)[0])
def handle_link(self): #ownload_url=self._r.hget(self.name,codes.url) a_link=[a.get('href') for a in self._soup.find_all('a') if a.get('href')] a_link=list(set(a_link)) b_link=[] for a in a_link: a=urlparse.urldefrag(a)[0] if a.startswith('//jandan.net/ooxx') or a.startswith('//wx1.sinaimg.cn'): print("Putting %s " % (a)) #self._r.lpush(download_url,a) b_link.append(a) self._add_link(b_link)
def defrag(self): if "#" in self: url, frag = urldefrag(self) return URIRef(url) else: return self
def startElementNS(self, name, qname, attrs): stack = self.stack stack.append(ElementHandler()) current = self.current parent = self.parent base = attrs.get(BASE, None) if base is not None: base, frag = urldefrag(base) if parent and parent.base: base = urljoin(parent.base, base) else: systemId = self.locator.getPublicId() \ or self.locator.getSystemId() if systemId: base = urljoin(systemId, base) else: if parent: base = parent.base if base is None: systemId = self.locator.getPublicId() \ or self.locator.getSystemId() if systemId: base, frag = urldefrag(systemId) current.base = base language = attrs.get(LANG, None) if language is None: if parent: language = parent.language current.language = language current.start(name, qname, attrs)
def _verifyDiscoverySingle(self, endpoint, to_match): """Verify that the given endpoint matches the information extracted from the OpenID assertion, and raise an exception if there is a mismatch. @type endpoint: openid.consumer.discover.OpenIDServiceEndpoint @type to_match: openid.consumer.discover.OpenIDServiceEndpoint @rtype: NoneType @raises ProtocolError: when the endpoint does not match the discovered information. """ # Every type URI that's in the to_match endpoint has to be # present in the discovered endpoint. for type_uri in to_match.type_uris: if not endpoint.usesExtension(type_uri): raise TypeURIMismatch(type_uri, endpoint) # Fragments do not influence discovery, so we can't compare a # claimed identifier with a fragment to discovered information. defragged_claimed_id, _ = urldefrag(to_match.claimed_id) if defragged_claimed_id != endpoint.claimed_id: raise ProtocolError( 'Claimed ID does not match (different subjects!), ' 'Expected %s, got %s' % (defragged_claimed_id, endpoint.claimed_id)) if to_match.getLocalID() != endpoint.getLocalID(): raise ProtocolError('local_id mismatch. Expected %s, got %s' % (to_match.getLocalID(), endpoint.getLocalID())) # If the server URL is None, this must be an OpenID 1 # response, because op_endpoint is a required parameter in # OpenID 2. In that case, we don't actually care what the # discovered server_url is, because signature checking or # check_auth should take care of that check for us. if to_match.server_url is None: assert to_match.preferredNamespace() == OPENID1_NS, ( """The code calling this must ensure that OpenID 2 responses have a non-none `openid.op_endpoint' and that it is set as the `server_url' attribute of the `to_match' endpoint.""") elif to_match.server_url != endpoint.server_url: raise ProtocolError('OP Endpoint mismatch. Expected %s, got %s' % (to_match.server_url, endpoint.server_url))
def handle(self, request, organization, project, team, group_id, event_id): try: event = Event.objects.get(group=group_id, id=event_id) except Event.DoesNotExist: return self.redirect(reverse('sentry')) Event.objects.bind_nodes([event], 'data') interfaces = event.interfaces if 'sentry.interfaces.Http' not in interfaces: # TODO: show a proper error return self.redirect(reverse('sentry')) # TODO(mattrobenolt): Add Cookie as a header http = interfaces['sentry.interfaces.Http'] if http.headers: headers = '\n'.join('%s: %s' % (k, v) for k, v in http.headers if k[0].upper() == k[0]) else: headers = '' if isinstance(http.data, dict): data = safe_urlencode(http.data) else: data = http.data initial = { 'url': urlparse.urldefrag(http.full_url)[0], 'method': http.method, 'headers': headers, 'data': data, } form = ReplayForm(request.POST or None, initial=initial) if form.is_valid(): result = Replayer( url=form.cleaned_data['url'], method=form.cleaned_data['method'], data=form.cleaned_data['data'], headers=form.cleaned_data['headers'], ).replay() else: result = None context = { 'group': event.group, 'event': event, 'form': form, 'result': result, } return self.respond('sentry/events/replay_request.html', context)
def absolutize(self, uri, defrag=1): base = urljoin("file:", pathname2url(os.getcwd())) result = urljoin("%s/" % base, uri, allow_fragments=not defrag) if defrag: result = urldefrag(result)[0] if not defrag: if uri and uri[-1] == "#" and result[-1] != "#": result = "%s#" % result return URIRef(result) # From: http://www.w3.org/TR/REC-xml#NT-CombiningChar # # * Name start characters must have one of the categories Ll, Lu, Lo, # Lt, Nl. # # * Name characters other than Name-start characters must have one of # the categories Mc, Me, Mn, Lm, or Nd. # # * Characters in the compatibility area (i.e. with character code # greater than #xF900 and less than #xFFFE) are not allowed in XML # names. # # * Characters which have a font or compatibility decomposition # (i.e. those with a "compatibility formatting tag" in field 5 of the # database -- marked by field 5 beginning with a "<") are not allowed. # # * The following characters are treated as name-start characters rather # than name characters, because the property file classifies them as # Alphabetic: [#x02BB-#x02C1], #x0559, #x06E5, #x06E6. # # * Characters #x20DD-#x20E0 are excluded (in accordance with Unicode # 2.0, section 5.14). # # * Character #x00B7 is classified as an extender, because the property # list so identifies it. # # * Character #x0387 is added as a name character, because #x00B7 is its # canonical equivalent. # # * Characters ':' and '_' are allowed as name-start characters. # # * Characters '-' and '.' are allowed as name characters.