我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用lxml.etree.HTML。
def QA_fetch_get_stock_block(): url_list = ['gn', 'dy', 'thshy', 'zjhhy'] # ??/??/?????/????? data = [] for item in url_list: tree = etree.HTML(requests.get( 'http://q.10jqka.com.cn/{}/'.format(item), headers=headers).text) gn = tree.xpath('/html/body/div/div/div/div/div/a/text()') gpath = tree.xpath('/html/body/div/div/div/div/div/a/@href') for _i in range(len(gn)): for i in range(1, 15): _data = etree.HTML(requests.get( 'http://q.10jqka.com.cn/{}/detail/order/desc/page/{}/ajax/1/code/{}'.format(item, i, gpath[_i].split('/')[-2]), headers=headers).text) name = _data.xpath('/html/body/table/tbody/tr/td[3]/a/text()') code = _data.xpath('/html/body/table/tbody/tr/td[3]/a/@href') for i_ in range(len(name)): print( 'Now Crawling-{}-{}-{}-{}'.format(gn[_i], code[i_].split('/')[-1], item, 'ths')) data.append([gn[_i], code[i_].split('/')[-1], item, 'ths']) return pd.DataFrame(data, columns=['blockname', 'code', 'type', 'source']).set_index('code', drop=False)
def rdoc(num_elements=1000): """Randomly generate an invalid HTML document.""" tag_names = ['p', 'div', 'span', 'i', 'b', 'script', 'table'] elements = [] for i in range(num_elements): choice = random.randint(0,3) if choice == 0: # New tag. tag_name = random.choice(tag_names) elements.append("<%s>" % tag_name) elif choice == 1: elements.append(rsentence(random.randint(1,4))) elif choice == 2: # Close a tag. tag_name = random.choice(tag_names) elements.append("</%s>" % tag_name) return "<html>" + "\n".join(elements) + "</html>"
def _get_html(cls, html, url, html_etree, params, **kwargs): if html: html = etree.HTML(html) elif url: if not kwargs.get('headers', None): kwargs['headers'] = { "User-Agent": get_random_user_agent() } response = requests.get(url, params, **kwargs) response.raise_for_status() content = response.content charset = cchardet.detect(content) text = content.decode(charset['encoding']) html = etree.HTML(text) elif html_etree is not None: return html_etree else: raise ValueError("html(url or html_etree) is expected") return html
def parse_page(page,pattern): page = etree.HTML(page.lower()) #page = etree.HTML(page.lower().decode('utf-8')) ips = page.xpath(pattern["ip"]) ports = page.xpath(pattern["port"]) ty = page.xpath(pattern["type"]) for i in range(len(ips)): ret = {} str = "%s:%s" ret["ip_port"] = str%(ips[i].text,ports[i].text) if ty[i].text.find("https") == -1: ret["type"] = 0 else: ret["type"] = 1 ret["db_flag"] = False yield ret
def get_list(self, search_url): data = {} # keylist = [0] * 5 data['table_name'] = 'dailyKeyword' html = requests.get(search_url, headers=self.headers, verify=False).content selector = etree.HTML(html) # ???? keyurl = selector.xpath('//div[@class="aside"]/ol[@class="hot-news"]/li/a/@href') keyword = selector.xpath('//div[@class="aside"]/ol[@class="hot-news"]/li/a/text()') res = {} res['keyurl'] = keyurl res['keyword'] = keyword for x in range(0,10): data['keyword'] = keyword[x] data ['keyurl'] = keyurl[x] data ['id'] = (x+1) self.save(data) return res # ??????
def __init__(self,data=None,response=None,url=None,logFile=None,color=True,debug=4): ''' :param data: default=None <class str|unicode response.text> :param response: default=None <class Response> :param url: default=None <class str> :param logFile: default=None <class str> :param color: default=True <class bool> :param debug: default=4 <class int|0 NONE,1 [Error],2 [Error][WARING],3 [Error][WARING][INFO],4 ALL> ''' self.logFile = logFile self.color = color self.debug = debug self.data = data self.response = response try: self.url = response.request.url if response and not url else url self._html = etree.HTML(self.data) if data else None except Exception as e: printText("[Error]parser.py Parser __init__:%s"%e,logFile=self.logFile,color=self.color,debug=self.debug)
def get_type_id(): start_url_list = [ 'http://www.autohome.com.cn/a00/', # ??? 'http://www.autohome.com.cn/a0/', # ??? 'http://www.autohome.com.cn/a/', # ???? 'http://www.autohome.com.cn/b/', # ??? 'http://www.autohome.com.cn/c/', # ???? 'http://www.autohome.com.cn/d/', # ??? 'http://www.autohome.com.cn/suv/', # SUV 'http://www.autohome.com.cn/mpv/', # MPV 'http://www.autohome.com.cn/s/', # ?? 'http://www.autohome.com.cn/p/', # ?? 'http://www.autohome.com.cn/mb/', # ?? ] models_list = [] for url_t in start_url_list: model_resp = process_request(url_t) model_respose = etree.HTML(model_resp) models = model_respose.xpath('.//a/@data-value') models_list = models_list + models models_list = list(set(models_list)) return models_list
def set_nasa_wallpaper(): st = datetime.fromtimestamp(time.time()).strftime('%y%m%d') url = URL07.format(st) r = requests.get(url) if r.status_code == 200: try: parser = etree.HTMLParser(recover=True) html = etree.HTML(r.content, parser) images = html.iter('img') if images is not None: images = list(images) if len(images) > 0: image_url = images[0].getparent().attrib['href'] image_url = 'https://apod.nasa.gov/' + image_url if download(image_url) is True: set_background(comun.POTD) except Exception as e: print(e)
def show_body(): # with open('lianjia_body.txt','r') as fp: with open('cq_error.txt', 'r') as fp: content = json.loads(fp.read())['body'] # print content tree = etree.HTML(content) nodes = tree.xpath('//li[@class="pictext"]') for node in nodes: xiaoqu_url = node.xpath('.//a[@class="flexbox post_ulog"]/@href')[0] name = node.xpath('.//div[@class="item_list"]/div[@class="item_main"]/text()')[0] desc = node.xpath('.//div[@class="item_list"]/div[@class="item_other text_cut"]/text()')[0] details = desc.split() price = node.xpath('.//div[@class="item_list"]/div[@class="item_minor"]/span/em/text()')[0] print xiaoqu_url print name print len(details) # print details for i in details: print i print # print details[0],details[1],details[2] # print price
def get_city_link(): headers = {'Host': 'm.lianjia.com', 'User-Agent': 'UCWEB/2.0 (Linux; U; Adr 2.3; zh-CN; MI-ONEPlus) U2/1.0.0 UCBrowser/8.6.0.199 U2/1.0.0 Mobile'} url = 'https://m.lianjia.com/city/' r = requests.get(url=url, headers=headers) contnet = r.text # print contnet tree = etree.HTML(contnet) t1 = tree.xpath('//ul[@class="item_lists"]')[1] city_list = [] for city in t1: link = city.xpath('.//a/@href')[0] if link == '/sh/': continue if link == '/su/': continue if link == '/xsbn/': continue city_list.append('https://m.lianjia.com' + link) return city_list
def debug_page(): headers = { 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:31.0) Gecko/20100101 Firefox/31.0' } url = 'http://m.qfang.com/guangzhou/rent/100001468?gardenId=1109818' r = requests.get(url=url, headers=headers) #r.encoding='gbk' print r.status_code print type(r.content) print r.content #print chardet.detect(r) tree = etree.HTML(r.text,parser=etree.HTMLParser(encoding='utf-8')) #print etree.tostring(tree) return tree,r.text # ????????header??
def testcase2(): js=json.loads(open('lianjia_sh.txt').read()) #print js body=js['data'] tree = etree.HTML(body) nodes = tree.xpath('//li[@class="pictext"]') print "NODE:",len(nodes) print js['args'] print '*'*20 print type(js) print type(js['args']) #p=re.compile('"cur_city_name":"(.*?)"') p=re.compile('"total":(\d+)') s=p.findall(js['args'])[0] print s ''' print type(s) print s print s.decode('utf-8').encode('gbk') print s.decode('unicode_escape') for k,v in js['args'].items(): print k,"::",v '''
def lxml_case2(): #????? str1=''' <bookstore> <book> <title>Harry Potter</title> <author>J K. Rowling</author> <year>2005</year> <price>29.99</price> </book> </bookstore> ''' tree=etree.HTML(str1) t1=tree.xpath('bookstore') print t1
def getData(self): base_url='http://sj.qq.com/myapp/category.htm' parent_url='http://sj.qq.com/myapp/category.htm?orgame=1' s=requests.get(url=parent_url,headers=self.headers) print s.status_code #print s.text tree=etree.HTML(s.text) menu=tree.xpath('//ul[@class="menu-junior"]')[0] print type(menu) link= menu.xpath('.//li[@id]/a/@href') catelog=[] for i in link: print i p=re.compile('categoryId=(-?\d+)') #x=base_url+i x=p.findall(i)[0] #print x catelog.append(x) return catelog
def get_list(self, cookies): print("?????%s???\r\n" % self.page) page_r = requests.get(self.targetUrl + "&page=%s" % self.page, cookies=cookies) if page_r.status_code == 200: if 'window.v=' in page_r.text: return 10001 tree = etree.HTML(page_r.text) init_list = tree.xpath('//*[@id="ht-kb"]/article/h3/a') list_array = [] for item in init_list: item_link = item.get('href') item_text = item.text item_array = [item_text,item_link] list_array.append(item_array) return list_array else: print("???????5??????\r\n") time.sleep(5) return self.get_list()
def get_proxys(pages=4): """????""" proxy_list = [] url = 'http://www.xicidaili.com/wn/' headers = generate_http_header() headers.update( { 'Referer': 'http://www.xicidaili.com/wn/', 'Host': 'www.xicidaili.com', } ) for page_no in range(1, pages + 1): response = requests.get(url=url.format(page_no=page_no), headers=headers) html = etree.HTML(response.text) ips = html.xpath("//table[@id='ip_list']/tr/td[2]/text()") ports = html.xpath("//table[@id='ip_list']/tr/td[3]/text()") assert len(ips) == len(ports) for (ip, port) in zip(ips, ports): proxy_list.append(constants.HTTP_PROXY_FORMATTER.format(ip=ip, port=port)) return proxy_list
def requests_company_detail_data(company_id): """?????????""" headers = generate_http_header() crawler_sleep() try: response = requests.get( url=constants.COMPANY_DETAIL_URL.format(company_id=company_id), headers=headers, cookies=Cookies.get_random_cookies(), allow_redirects=False, timeout=constants.TIMEOUT) except RequestException as e: logging.error(e) raise RequestsError(error_log=e) html = etree.HTML(response.text) advantage = html.xpath('//div[@id="tags_container"]//li/text()') size = html.xpath('//div[@id="basic_container"]//li[3]/span/text()') address = html.xpath('//p[@class="mlist_li_desc"]/text()') introduce = html.xpath('//span[@class="company_content"]//text()') return format_tag(advantage, address, size, introduce, company_id)
def requests_job_detail_data(job_id): """?????????""" headers = generate_http_header() crawler_sleep() try: response = requests.get( url=constants.JOB_DETAIL_URL.format(job_id=job_id), headers=headers, cookies=Cookies.get_random_cookies(), allow_redirects=False, timeout=constants.TIMEOUT) except RequestException as e: logging.error(e) raise RequestsError(error_log=e) html = etree.HTML(response.text) department = html.xpath('//div[@class="job-name"]/div[@class="company"]/text()') description = html.xpath('//dd[@class="job_bt"]/div//text()') keywords = html.xpath('//dd[@class="job_request"]//li[@class="labels"]/text()') return format_tag(department, description, keywords, job_id)
def index(url='http://music.163.com/discover'): headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate, sdch', 'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6', 'Host': 'music.163.com', 'Referer': 'http://music.163.com/', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 DOL/s_1511_r2x9ak474125_821', } try: r = requests.get(url, headers=headers, timeout=4) html = etree.HTML(r.content) play_lists = [urlparse.urljoin('http://music.163.com/', link) for link in html.xpath('//*[@id="discover-module"]/div[1]/div/div/div[1]/ul//li/div/a/@href') if link.startswith('/playlist')] for url in play_lists: app.send_task( 'tasks.playlist.playlist', args=(url, ), queue='playlist_queue', routing_key='tasks_playlist' ) except: print '????'
def playlist(url): headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate, sdch', 'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6', 'Host': 'music.163.com', 'Referer': 'http://music.163.com/', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 DOL/s_1511_r2x9ak474125_821', } try: r = requests.get(url, headers=headers) if r.status_code == 200: html = etree.HTML(r.content) ids = [search(link).group() for link in html.xpath('//a/@href') if link.startswith('/song?id') and search(link)] for song_id in ids: url = 'http://music.163.com/weapi/v1/resource/comments/R_SO_4_{}?csrf_token='.format(song_id) app.send_task( 'tasks.comment.comment', args=(url, song_id), queue='comment_queue', routing_key='tasks_comment' ) time.sleep(5) except: print '????'
def parse(self, response): # ??response yield link if not response: return None et = etree.HTML(response) links = et.xpath("//*[@valign='top'][1]/a/@href") urls=[] for link in links: #??id????????? print link uid=re.findall(r"http://weibo\.cn/u/(\w*)", link) #??????????eq:http://weibo.cn/renzhenghao) if uid: uid=uid[0] else: continue SinaWeiboItem["uid"]=uid info_url = "http://weibo.cn/{uid}/info".format(uid=uid) Request(info_url, callback=self.parse_info) datas={"uid":SinaWeiboItem["uid"],"name":SinaWeiboItem["name"],"info":SinaWeiboItem["info"]} print sina_info.insert(datas) urls.append("http://weibo.cn/{uid}/fans".format(uid=uid)) #url???????? return urls
def media_by_tag(browser, tag_url, media_url, tag, media_max_likes, media_min_likes): # returns list with the 14 'nodes' (posts) for the tag page result = {'posts': False, 'tag': tag} try: explore_site = browser.get(tag_url %(tag)) tree = etree.HTML(explore_site.text) data = return_sharedData(tree) if data: nodes = data['entry_data']['TagPage'][0]['tag']['media']['nodes'] result['posts'] = [{'user_id': n['owner']['id'], 'username': return_username(browser, media_url, n['code']), 'likes': n['likes']['count'], 'caption': n['caption'], 'media_id': n['id'], 'url_code': n['code']} for n in nodes if media_min_likes <= n['likes']['count'] <= media_max_likes if not n['comments_disabled']] except Exception as e: print '\nError in obtaining media by tag: %s' %(e) return result
def parse_main_subjects(self, content): ''' ????????????????? :param content: ??????? :return: ['?????????', '?????????'] ''' try: html = etree.HTML(content.lower()) subject = html.xpath('//ul[@class="img"]/li') subject_urls = list() for sub in subject: a_href = sub[0].get('href') subject_urls.append(a_href) return subject_urls except Exception as e: print(str(e)) return list()
def replace_InvalidTag(Html): ''' ??HTML?????? ''' re_cdata = re.compile('//<!\[CDATA\[[^>]*//\]\]>', re.I) # ??CDATA Html = re_cdata.sub('', Html) re_cdata = re.compile('<!\[CDATA\[[^>]*//\]\]>', re.I) # ??CDATA Html = re_cdata.sub('', Html) re_br = re.compile('<br\s*?/?>') # ???? Html = re_br.sub('\n', Html) space_line = re.compile('\s+') # ??????? Html = space_line.sub('', Html) re_comment = re.compile('<!--[^>]*-->') # ??HTML?? Html = re_comment.sub('', Html) re_style = re.compile('<style\s*[^>]*>(.*?)</style\s*>') Html = re_style.sub('', Html) re_script = re.compile('<script\s*[^>]*>(.*?)</script>') Html = re_script.sub('', Html) re_h = re.compile('</?[^>]*>') # ??html?? Html = re_h.sub('', Html) return Html
def replace_CharEntity(Html): ''' ????HTML????, ?????????HTML???????? ''' CHAR_ENTITIES = {'nbsp': ' ', '160': ' ', 'lt': '<', '60': '<', 'gt': '>', '62': '>', 'amp': '&', '38': '&', 'quot': '"', '34': '"', } re_charEntity = re.compile(r'&#?(?P<name>\w+);') sz = re_charEntity.search(Html) while sz: key = sz.group('name') # ??&?d?entity,?>?gt try: Html = re_charEntity.sub(CHAR_ENTITIES[key], Html, 1) sz = re_charEntity.search(Html) except KeyError: # ????? Html = re_charEntity.sub('', Html, 1) sz = re_charEntity.search(Html) return Html
def extract_meta(html): ''' ????meta??????????? ''' if chardet.detect(html)['encoding'] == 'utf-8': html = html.decode('utf-8') meta_list = [] # ??html?meta??? page = etree.HTML(html.lower()) xpath_result = page.xpath(u"//meta/@content") for once_xpath_result in xpath_result: # ??????????? if zh_check(once_xpath_result) == True: meta_list.append(utf8_transfer(once_xpath_result).decode('utf-8')) if meta_list != []: return meta_list else: return False
def validProxy(self): """ url: http://www.66ip.cn/ """ url = 'http://www.66ip.cn/areaindex_1/1.html' response = requests.get(url=url, headers=self.headers) htmlDoc = response.content.decode('gbk') htmlTree = etree.HTML(htmlDoc) proxy_list = htmlTree.xpath('.//table//tr') for proxy in proxy_list: proxies = ':'.join(proxy.xpath('./td/text()')[0:2]) if self.__verifyProxy(proxies): if self.__isVaildProxy(proxies): return { "https": "https://{proxy}".format(proxy = proxies) } return None # ??IP??????
def get_xml_data(req_string, headers, data=None): req = urllib2.Request(req_string, headers=headers) html_data = _get_html_data(req, data) # Clean chunked data html_data = clean_chunked_data(html_data) #log_user_action(req.get_host() ,'chunked data', html_data, {}) try: data = etree.fromstring(html_data) except XMLSyntaxError: # lxml cannot handle encoding declarations :( data = etree.HTML(html_data, etree.HTMLParser()) # data is None when it was not XML, like 404 page without 404 code if data is not None: data = data.getroottree() else: raise urllib2.HTTPError(req_string, 404, "Not an XML", None, None) # TODO: check valid #if not data.find('.//prestashop'): # raise urllib2.HTTPError(req_string, 404, "Not an XML", None, None) return data
def MakePoem(word): url_base = "http://so.gushiwen.org/search.aspx?value=" key = word url = url_base+key res = requests.get(url) res.encoding = 'utf-8' #print(res.text) root = etree.HTML(res.content) items = root.xpath('//div[@class="sons"][2]/p[@style="margin-bottom:0px;"]')[0] item = items.xpath('string(.)') content = item.replace('\n','').replace(' ','') length = len(content) answer = content[:length-1] return answer #print(content)
def parse_home(self, home_content): if home_content is None: return None home_content = home_content.encode('ISO-8859-1').decode('gbk') html = etree.HTML(home_content, parser=etree.HTMLParser(encoding='utf-8')) alinks = html.xpath('//a[@href]') pattern_capture = re.compile(ur"?(\d{6})?(.+)") l = [] for alink in alinks: aa = alink.text if aa != None: match = pattern_capture.match(aa) if match: #????,??????? # l.append((match.group(1), match.group(2))) l.append(match.group(1)) return l #?????????????,???????,????dict?,????????,?????????????
def parse_ratio(self, info, content): # content = content.split('"')[1] html = etree.HTML(content, parser=etree.HTMLParser(encoding='utf-8')) tds = html.xpath('//td[@class="tor"]') if len(tds) > 2: #??????,?????---? #???????????????????????????????????http://fund.eastmoney.com/f10/cyrjg_510090.html?????>???????????????+??=100%????<=???????????? insito = tds[0].text if insito != '---': info.inratio += safe_to_float(insito.split("%")[0]) # innerto = tds[2].text # if innerto != '---': # self.inratio += safe_to_float(innerto.split("%")[0]) # self.inratio = safe_to_float(.split('%')[0]) + safe_to_float(tds[2].text.split('%')[0]) #?????????,????????
def parse_stocks(self, info, content): html = etree.HTML(content, parser=etree.HTMLParser(encoding='utf-8')) #????????????,????????????? tbs = html.xpath('//table[@class="w782 comm tzxq"]') # pers = html.xpath('//table[@class="w782 comm tzxq"]') if len(tbs) > 0: #???????,????? stocktds = tbs[0].xpath('.//td[@class="tol"]/a') pers = tbs[0].xpath('.//td[@class="tor"]') # ???????????,????,?????5??? front, interval = 2, 5 if not '???' in content: front, interval = 0, 3 for (index, stocked) in enumerate(stocktds): # info.stocks.append(stocked.text) # tor????,????????? per = pers[index*interval+front] # ???????? "???????????????????????????????" ???????? if per == '---': continue # ?????????,??[????-3.6%,????-4.1%]????? # ????????bug,?????,?????? stockname = stocked.text if not stockname is None and len(stockname) > 0: info.stocks.append(stockname + '-' + per.text)
def parse_index_list(self, index_list_content): # ???????? index_list_content = index_list_content.encode('ISO-8859-1').decode('utf-8') parsed_content = etree.HTML(index_list_content, parser=etree.HTMLParser(encoding='utf-8')) trs = parsed_content.xpath('//tbody/tr') indexs = [] for tr in trs: tds = tr.xpath('./td') if len(tds) == 5: index = IndexInfo() code = tds[0].text.strip() if len(code.split('.')) == 2: index.code = code.split('.')[0] index.full_code = code index.name = tds[1].text.strip() index.begin_time = tds[2].text.strip() index.short_name = tds[3].text.strip() #????url,???????? weave = tds[4].xpath('./a') if len(weave) == 1: index.weave = weave[0].attrib['href'].strip() else: index.weave = tds[4].text.strip() indexs.append(index) return indexs
def getHtmlTree(url, **kwargs): """ ??html? :param url: :param kwargs: :return: """ header = {'Connection': 'keep-alive', 'Cache-Control': 'max-age=0', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko)', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate, sdch', 'Accept-Language': 'zh-CN,zh;q=0.8', } # TODO ?????????????? wr = WebRequest() # delay 2s for per request time.sleep(2) html = wr.get(url=url, header=header).content return etree.HTML(html)
def get_html_tree(url, headers=None, cookie=None, proxy=None): if headers is None: headers = HEADERS try: response = requests.get(url=url, headers=headers, cookies=cookie, timeout=10, proxies=proxy) response.raise_for_status() response.encoding = response.apparent_encoding html = response.text if isinstance(html, bytes): html = html.decode("utf-8") time.sleep(1) return etree.HTML(html) except Exception as e: log.error("{0}".format(e)) raise e
def WriteHTML(self,testcaseinfo): self.CreateHtmlFile() f = open(self.reportfile,"r") htmlcontent = f.read() f.close() #tree = mytree.fromstring(str(htmlcontent)) htmlcontent.encode('utf-8') tree = html.fromstring(htmlcontent) tableElem = tree.find(".//table") if testcaseinfo.result == "Failed": mytablerow = "<tr><td>{0}</td><td>{1}</td><td>{2}</td><td bgcolor=\"#FF0000\">{3}</td><td>{4}</td><td>{5}</td><td>{6}</td><td>{7}</td></tr>".format(testcaseinfo.id,testcaseinfo.name,testcaseinfo.owner,testcaseinfo.result,testcaseinfo.starttime,testcaseinfo.endtime,testcaseinfo.secondsDuration,testcaseinfo.errorinfo) else: mytablerow = "<tr><td>{0}</td><td>{1}</td><td>{2}</td><td>{3}</td><td>{4}</td><td>{5}</td><td>{6}</td><td>{7}</td></tr>".format(testcaseinfo.id,testcaseinfo.name,testcaseinfo.owner,testcaseinfo.result,testcaseinfo.starttime,testcaseinfo.endtime,testcaseinfo.secondsDuration,testcaseinfo.errorinfo) tableElem.append(mytree.HTML(str(mytablerow))) f = open(self.reportfile,"w") #html.tostring newContent = repr(html.tostring(tree,method="html",with_tail=False)) newContent = newContent.replace(r"\n","").replace(r"\t","").replace('b\'',"") newContent = newContent[:len(newContent)-1] f.write(newContent) f.close()