From efa41ce85e63982713a75c3f56ba5f85e750fac4 Mon Sep 17 00:00:00 2001 From: Callum McIntyre Date: Wed, 21 Mar 2018 13:46:59 +0000 Subject: [PATCH 01/14] Improve timeout handling - Change timeout to 60s (5 mins is too long) - Catch exceptions when fetching the image and skip (better to get all images except 1 than crash) Signed-off-by: Callum McIntyre --- ragelib/html_body_writer.py | 4 +++- ragelib/image_fetcher.py | 9 +++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/ragelib/html_body_writer.py b/ragelib/html_body_writer.py index 7e908d3..736d021 100644 --- a/ragelib/html_body_writer.py +++ b/ragelib/html_body_writer.py @@ -11,7 +11,9 @@ def join_cells(tds): @staticmethod def make_image_element(image_bytes): - return f"" + if image_bytes: + return f"" + return "" def make_table_element(self, heading_cells, data_cells): diff --git a/ragelib/image_fetcher.py b/ragelib/image_fetcher.py index 1992538..38cdf78 100644 --- a/ragelib/image_fetcher.py +++ b/ragelib/image_fetcher.py @@ -18,7 +18,7 @@ def __init__(self, data, geckodriver_path, logger): def get_graph_screenshot(self, url, driver): - wait = WebDriverWait(driver, timeout=300) + wait = WebDriverWait(driver, timeout=60) self.logger.debug(f"Begun fetching url {url}") driver.get(url) @@ -36,7 +36,12 @@ def get_graph_screenshot(self, url, driver): def fetch_images(self): for item in tqdm(self.data, desc='Fetching graphs...'): - item['graph_bytes'] = self.get_graph_screenshot(item['graph_link'], self.driver) + try: + item['graph_bytes'] = self.get_graph_screenshot(item['graph_link'], self.driver) + except Exception as e: + self.logger.warn("Failed while fetching image for link "+item['graph_link']) + print(e) + item['graph_bytes'] = None self.driver.quit() return self.data \ No newline at end of file From b97d4928b4511c951a24bfc1ae2effd248d6e6ab Mon Sep 17 00:00:00 2001 From: mcintyre1994 Date: Sun, 25 Mar 2018 21:55:06 +0100 Subject: [PATCH 02/14] Update URLs for perf101 --- README.md | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d66074c..1ab030e 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ One of the key features of ragelib is the ability to fetch a rendered graph from Recommended use is with pipenv (`pip install pipenv`): ``` -pipenv install -e git+https://github.com/mcintyre94/ragelib.git#egg=ragelib +pipenv install -e git+https://github.com/perf101/ragelib.git#egg=ragelib ``` ## brief_parser diff --git a/setup.py b/setup.py index 08b4a89..c899a39 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ # Package meta-data. NAME = 'ragelib' DESCRIPTION = 'A helper library for RAGE' -URL = 'https://github.com/mcintyre94/ragelib' # TODO: move to github! +URL = 'https://github.com/perf101/ragelib' EMAIL = 'callum.mcintyre@citrix.com' AUTHOR = 'Callum McIntyre' REQUIRES_PYTHON = '>=3.6.0' From 5f0bf11bc39cd951cf1a090bc1d9df818550f164 Mon Sep 17 00:00:00 2001 From: Callum McIntyre Date: Wed, 9 May 2018 11:59:17 +0100 Subject: [PATCH 03/14] Add the context to data Context includes useful data about how the test ran - the soms, machines used, VMs used, and other test parameters. This is useful for differentiating between tests with the same/similar names, and without it we need to click through to the RAGE graph to determine how tests differed. ragelib now passes context of each row for consumers to use however suits them bet. Signed-off-by: Callum McIntyre --- ragelib/report_parser.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ragelib/report_parser.py b/ragelib/report_parser.py index 5a6f588..9074217 100644 --- a/ragelib/report_parser.py +++ b/ragelib/report_parser.py @@ -5,6 +5,11 @@ def __init__(self, report_html, logger): self.soup = BeautifulSoup(report_html, 'html.parser') self.logger = logger + @staticmethod + def get_context(tr): + # Get the context from a row + return tr('td')[0].text.strip() + @staticmethod def get_description(tr): # Get the description from a row @@ -40,6 +45,7 @@ def parse_data(self): self.logger.warning(f"Report contains {len(data_rows) - len(visible_rows)} hidden rows. Freezing the brief report before saving it will make parsing faster.") data = [{ + 'context': self.get_context(row), 'title': self.get_description(row), 'graph_link': self.get_graph_link(row), 'tds': self.get_data_tds(row) From 1f9d621d0763c4c43dfd7433e282635753077c93 Mon Sep 17 00:00:00 2001 From: Patrick Cooke Date: Mon, 8 Nov 2021 15:42:13 +0000 Subject: [PATCH 04/14] Added handling of alert box popup when rage tries to graph with a large number of points --- ragelib/image_fetcher.py | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/ragelib/image_fetcher.py b/ragelib/image_fetcher.py index 38cdf78..86cf862 100644 --- a/ragelib/image_fetcher.py +++ b/ragelib/image_fetcher.py @@ -1,3 +1,4 @@ +from selenium.common.exceptions import TimeoutException from selenium.webdriver import Firefox from selenium.webdriver.common.by import By from selenium.webdriver.firefox.options import Options @@ -16,15 +17,27 @@ def __init__(self, data, geckodriver_path, logger): logger.info(f"Using {geckodriver_path} for geckodriver") self.driver = Firefox(executable_path=geckodriver_path, firefox_options=options) - def get_graph_screenshot(self, url, driver): wait = WebDriverWait(driver, timeout=60) self.logger.debug(f"Begun fetching url {url}") driver.get(url) - - # On draw starting the progress_img element appears (spinner). When done it disappears. - wait.until(expected.visibility_of_element_located((By.ID, 'progress_img'))) - self.logger.debug("Element #progress_img now visible, graph loading") + + try: + WebDriverWait(driver, timeout=10).until(expected.alert_is_present()) + alert = driver.switch_to.alert + if "About to plot" in alert.text: + alert.accept() + self.logger.warn("Many points alert box accepted") + else: + alert.dismiss() + self.logger.warn("Unexpected alert box dismissed") + except TimeoutException: + self.logger.debug("No alert box present") + + # On draw starting the progress_img element appears (spinner) and the graph title is unhidden. + # When done it disappears. + wait.until(expected.visibility_of_element_located((By.ID, 'graph_title'))) + self.logger.debug("Element #graph_title now visible, graph loading") wait.until(expected.invisibility_of_element_located((By.ID, 'progress_img'))) self.logger.debug("Element #progress_img now invisible, graph loaded") @@ -33,14 +46,19 @@ def get_graph_screenshot(self, url, driver): return canvas_bytes - def fetch_images(self): for item in tqdm(self.data, desc='Fetching graphs...'): try: item['graph_bytes'] = self.get_graph_screenshot(item['graph_link'], self.driver) + item['graph_exception'] = None + except TimeoutException: + self.logger.warn("Failed while fetching image for link "+item['graph_link']) + item['graph_exception'] = "TimeoutException" + item['graph_bytes'] = None except Exception as e: self.logger.warn("Failed while fetching image for link "+item['graph_link']) print(e) + item['graph_exception'] = str(e) item['graph_bytes'] = None self.driver.quit() From 69db96ccdcca8597be7253850ffe861938c08d33 Mon Sep 17 00:00:00 2001 From: YanRachel Date: Fri, 7 Oct 2022 12:33:59 +0100 Subject: [PATCH 05/14] Reorder element checking as to avoid 10s wait for dialogue pop-up --- ragelib/image_fetcher.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/ragelib/image_fetcher.py b/ragelib/image_fetcher.py index 86cf862..a178743 100644 --- a/ragelib/image_fetcher.py +++ b/ragelib/image_fetcher.py @@ -22,8 +22,10 @@ def get_graph_screenshot(self, url, driver): self.logger.debug(f"Begun fetching url {url}") driver.get(url) - try: - WebDriverWait(driver, timeout=10).until(expected.alert_is_present()) + wait.until(expected.visibility_of((By.ID, 'graph_title'))) + self.logger.debug("Element #graph_title now visible, graph loading") + + if expected.alert_is_present(): alert = driver.switch_to.alert if "About to plot" in alert.text: alert.accept() @@ -31,13 +33,9 @@ def get_graph_screenshot(self, url, driver): else: alert.dismiss() self.logger.warn("Unexpected alert box dismissed") - except TimeoutException: - self.logger.debug("No alert box present") # On draw starting the progress_img element appears (spinner) and the graph title is unhidden. # When done it disappears. - wait.until(expected.visibility_of_element_located((By.ID, 'graph_title'))) - self.logger.debug("Element #graph_title now visible, graph loading") wait.until(expected.invisibility_of_element_located((By.ID, 'progress_img'))) self.logger.debug("Element #progress_img now invisible, graph loaded") From 0725897886d716c27099f72d4cfa5a0878fae6bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Mon, 31 Oct 2022 17:25:23 +0000 Subject: [PATCH 06/14] ragelib: use the correct visibility_of API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Edwin Török --- ragelib/image_fetcher.py | 126 +++++++++++++++++++-------------------- 1 file changed, 63 insertions(+), 63 deletions(-) diff --git a/ragelib/image_fetcher.py b/ragelib/image_fetcher.py index a178743..a620af5 100644 --- a/ragelib/image_fetcher.py +++ b/ragelib/image_fetcher.py @@ -1,63 +1,63 @@ -from selenium.common.exceptions import TimeoutException -from selenium.webdriver import Firefox -from selenium.webdriver.common.by import By -from selenium.webdriver.firefox.options import Options -from selenium.webdriver.support import expected_conditions as expected -from selenium.webdriver.support.wait import WebDriverWait -from tqdm import tqdm - -class ImageFetcher(): - def __init__(self, data, geckodriver_path, logger): - self.data = data - self.logger = logger - - options = Options() - options.add_argument('-headless') - - logger.info(f"Using {geckodriver_path} for geckodriver") - self.driver = Firefox(executable_path=geckodriver_path, firefox_options=options) - - def get_graph_screenshot(self, url, driver): - wait = WebDriverWait(driver, timeout=60) - self.logger.debug(f"Begun fetching url {url}") - driver.get(url) - - wait.until(expected.visibility_of((By.ID, 'graph_title'))) - self.logger.debug("Element #graph_title now visible, graph loading") - - if expected.alert_is_present(): - alert = driver.switch_to.alert - if "About to plot" in alert.text: - alert.accept() - self.logger.warn("Many points alert box accepted") - else: - alert.dismiss() - self.logger.warn("Unexpected alert box dismissed") - - # On draw starting the progress_img element appears (spinner) and the graph title is unhidden. - # When done it disappears. - wait.until(expected.invisibility_of_element_located((By.ID, 'progress_img'))) - self.logger.debug("Element #progress_img now invisible, graph loaded") - - canvas = driver.find_element_by_tag_name('canvas') - canvas_bytes = canvas.screenshot_as_base64 - - return canvas_bytes - - def fetch_images(self): - for item in tqdm(self.data, desc='Fetching graphs...'): - try: - item['graph_bytes'] = self.get_graph_screenshot(item['graph_link'], self.driver) - item['graph_exception'] = None - except TimeoutException: - self.logger.warn("Failed while fetching image for link "+item['graph_link']) - item['graph_exception'] = "TimeoutException" - item['graph_bytes'] = None - except Exception as e: - self.logger.warn("Failed while fetching image for link "+item['graph_link']) - print(e) - item['graph_exception'] = str(e) - item['graph_bytes'] = None - - self.driver.quit() - return self.data \ No newline at end of file +from selenium.common.exceptions import TimeoutException +from selenium.webdriver import Firefox +from selenium.webdriver.common.by import By +from selenium.webdriver.firefox.options import Options +from selenium.webdriver.support import expected_conditions as expected +from selenium.webdriver.support.wait import WebDriverWait +from tqdm import tqdm + +class ImageFetcher(): + def __init__(self, data, geckodriver_path, logger): + self.data = data + self.logger = logger + + options = Options() + options.add_argument('-headless') + + logger.info(f"Using {geckodriver_path} for geckodriver") + self.driver = Firefox(executable_path=geckodriver_path, firefox_options=options) + + def get_graph_screenshot(self, url, driver): + wait = WebDriverWait(driver, timeout=60) + self.logger.debug(f"Begun fetching url {url}") + driver.get(url) + + wait.until(expected.visibility_of_element_located((By.ID, 'graph_title'))) + self.logger.debug("Element #graph_title now visible, graph loading") + + if expected.alert_is_present(): + alert = driver.switch_to.alert + if "About to plot" in alert.text: + alert.accept() + self.logger.warn("Many points alert box accepted") + else: + alert.dismiss() + self.logger.warn("Unexpected alert box dismissed") + + # On draw starting the progress_img element appears (spinner) and the graph title is unhidden. + # When done it disappears. + wait.until(expected.invisibility_of_element_located((By.ID, 'progress_img'))) + self.logger.debug("Element #progress_img now invisible, graph loaded") + + canvas = driver.find_element_by_tag_name('canvas') + canvas_bytes = canvas.screenshot_as_base64 + + return canvas_bytes + + def fetch_images(self): + for item in tqdm(self.data, desc='Fetching graphs...'): + try: + item['graph_bytes'] = self.get_graph_screenshot(item['graph_link'], self.driver) + item['graph_exception'] = None + except TimeoutException: + self.logger.warn("Failed while fetching image for link "+item['graph_link']) + item['graph_exception'] = "TimeoutException" + item['graph_bytes'] = None + except Exception as e: + self.logger.warn("Failed while fetching image for link "+item['graph_link']) + print(e) + item['graph_exception'] = str(e) + item['graph_bytes'] = None + + self.driver.quit() + return self.data From 72b41e949f8b61be45495a84d6b2665de2c19377 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Wed, 9 Nov 2022 14:09:15 +0000 Subject: [PATCH 07/14] Package library in a standard way To prepare the library to work normally with pip install we: - define ragelib as a regular python package - move to setup.py and requirements.txt to pyproject.toml and setup.cfg We remove Pipfile because it's only useful for locking library versions, only that should only be done on non-library packages as these may cause conflicts otherwise. If there are any bounds these can be define in the dependencies Signed-off-by: Pau Ruiz Safont --- Pipfile | 16 ----------- Pipfile.lock | 60 ----------------------------------------- pyproject.toml | 3 +++ ragelib/__init__.py | 0 requirements.txt | Bin 1406 -> 0 bytes setup.cfg | 33 +++++++++++++++++++++++ setup.py | 64 ++------------------------------------------ 7 files changed, 38 insertions(+), 138 deletions(-) delete mode 100644 Pipfile delete mode 100644 Pipfile.lock create mode 100644 pyproject.toml create mode 100644 ragelib/__init__.py delete mode 100644 requirements.txt create mode 100644 setup.cfg diff --git a/Pipfile b/Pipfile deleted file mode 100644 index 31c937c..0000000 --- a/Pipfile +++ /dev/null @@ -1,16 +0,0 @@ -[[source]] - -url = "https://pypi.python.org/simple" -verify_ssl = true -name = "pypi" - - -[packages] - -selenium = "*" -"bs4" = "*" -tqdm = "*" - - -[dev-packages] - diff --git a/Pipfile.lock b/Pipfile.lock deleted file mode 100644 index ebf09c5..0000000 --- a/Pipfile.lock +++ /dev/null @@ -1,60 +0,0 @@ -{ - "_meta": { - "hash": { - "sha256": "3faf7d32d7fea5206cf6ec73ba7e5af6ac8630cfec7aab432ba084e9dcc9f27a" - }, - "host-environment-markers": { - "implementation_name": "cpython", - "implementation_version": "3.6.1", - "os_name": "nt", - "platform_machine": "AMD64", - "platform_python_implementation": "CPython", - "platform_release": "10", - "platform_system": "Windows", - "platform_version": "10.0.16299", - "python_full_version": "3.6.1", - "python_version": "3.6", - "sys_platform": "win32" - }, - "pipfile-spec": 6, - "requires": {}, - "sources": [ - { - "name": "pypi", - "url": "https://pypi.python.org/simple", - "verify_ssl": true - } - ] - }, - "default": { - "beautifulsoup4": { - "hashes": [ - "sha256:7015e76bf32f1f574636c4288399a6de66ce08fb7b2457f628a8d70c0fbabb11", - "sha256:11a9a27b7d3bddc6d86f59fb76afb70e921a25ac2d6cc55b40d072bd68435a76", - "sha256:808b6ac932dccb0a4126558f7dfdcf41710dd44a4ef497a0bb59a77f9f078e89" - ], - "version": "==4.6.0" - }, - "bs4": { - "hashes": [ - "sha256:36ecea1fd7cc5c0c6e4a1ff075df26d50da647b75376626cc186e2212886dd3a" - ], - "version": "==0.0.1" - }, - "selenium": { - "hashes": [ - "sha256:b0a06afa31a80d7dcb627eafb62776488b20bdaffcd807ac4158fadbc11061f4", - "sha256:a34a833d89bcfb463bfba5e5515a9276bb94221787b409f0ad28d2f91903e31d" - ], - "version": "==3.9.0" - }, - "tqdm": { - "hashes": [ - "sha256:f66468c14ccd011a627734c9b3fd72f20ce16f8faecc47384eb2507af5924fb9", - "sha256:5ec0d4442358e55cdb4a0471d04c6c831518fd8837f259db5537d90feab380df" - ], - "version": "==4.19.6" - } - }, - "develop": {} -} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..4e50ea7 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools >= 38.6.0", "wheel"] +build-backend = "setuptools.build_meta:__legacy__" diff --git a/ragelib/__init__.py b/ragelib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index e62c9015a60c16179a7852010d20808a1c9fd574..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1406 zcma)+TaMH)5Jl@7i9N6ZMoG6j=^)Kw;v^PG!%IE0ec+rj{B|SDiDTE}`qr)d{#E0M zHQwSizC`2w61Vuy`$v>`W|Vk}i8;iF_{jK-^^NCqKG!(Kh4W7_V==NmSWQuhb>^(G z*VvtjGqE$p#_AMT#yJ+o%HEFGlxGJ~u6R^DDt0A(8)y2g%sn%0*lD#gD>+8c-N&cR zn)yx{dnVEu-<`1~hAU68E`Re+*mooY_!bZt!TYXwobdBD8{Ze=6cFvqnG>TyXMU+mPTLjG=uRgCo8c=cBafGL*j2~(eiBOTm(B`CG8K+3z25N2BK7a<$Go{ zR*l^mlm~N(_wf#k8y;4D$cjG^^FFIq^Z=t8veH(kb7F<$^++_H^?Vm>THa&_kBC)3 z!OxmsxVLOs@6PTBI-{HzGN5u)jMZBc^^MaP<~J1l7GD$p8T`wCsJ3%E5Ybw%#I(lN z!kfsy6;UzXP&M=~F_y%lN@dr3c*l*E{<5yt?Fs9(vR~Ozhvt)!@=z5bvYK|wIU_F_ zMI^nW>@@qp&QpozQ|Dr~4=ZasPWG&}Jge1Se^JS)Pc<lvIu{>l}zSL;%7a6iw?FLyCsXW;^W|Rw8Dtg08cH~-Q*4rJsN<2&Y8pXYxZ~s;5 L$2)dywPf}WyfDU5 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..6654ae8 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,33 @@ +[metadata] +name = ragelib +description = A helper library for RAGE +long_description = file: README.md +long-description-content-type = text/markdown; charset=UTF-8 +author = Xenserver +license = BSD-3-Clause +url = https://github.com/perf101/ragelib +project_urls = + Bug Tracker = https://github.com/perf101/ragelibissues + Source Code = https://github.com/perf101/ragelib +classifiers = + License :: OSI Approved :: BSD License + Development Status :: 6 - Mature + Intended Audience :: Developers + Programming Language :: Python :: 3 + Topic :: System :: Systems Administration + Topic :: Software Development :: Libraries :: Python Modules + +[options] +packages = find: +install_requires = + selenium >= 4.6.0 + bs4 + tqdm +python_requires = >=3.6, <4 + +[bdist_wheel] +# This flag says that the code is written to work on both Python 2 and Python +# 3. If at all possible, it is good practice to do this. If you cannot, you +# will need to generate wheels for each Python version that you support. +universal=1 + diff --git a/setup.py b/setup.py index c899a39..b26e383 100644 --- a/setup.py +++ b/setup.py @@ -1,63 +1,3 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- +from setuptools import setup -# Note: To use the 'upload' functionality of this file, you must: -# $ pip install twine - -import io -import os -import sys -from shutil import rmtree - -from setuptools import find_packages, setup, Command - -# Package meta-data. -NAME = 'ragelib' -DESCRIPTION = 'A helper library for RAGE' -URL = 'https://github.com/perf101/ragelib' -EMAIL = 'callum.mcintyre@citrix.com' -AUTHOR = 'Callum McIntyre' -REQUIRES_PYTHON = '>=3.6.0' - -# What packages are required for this module to be executed? -REQUIRED = [ - 'selenium', 'bs4', 'tqdm' -] - -# The rest you shouldn't have to touch too much :) -# ------------------------------------------------ -# Except, perhaps the License and Trove Classifiers! -# If you do change the License, remember to change the Trove Classifier for that! - -here = os.path.abspath(os.path.dirname(__file__)) - -# Import the README and use it as the long-description. -# Note: this will only work if 'README.rst' is present in your MANIFEST.in file! -with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f: - long_description = '\n' + f.read() - -# Where the magic happens: -setup( - name=NAME, - use_scm_version=True, - description=DESCRIPTION, - long_description=long_description, - author=AUTHOR, - author_email=EMAIL, - python_requires=REQUIRES_PYTHON, - url=URL, - packages=find_packages(exclude=('tests',)), - install_requires=REQUIRED, - include_package_data=True, - license='BSD-3-Clause', - classifiers=[ - # Trove classifiers - # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers - 'License :: OSI Approved :: BSD License', - 'Programming Language :: Python', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: Implementation :: CPython', - 'Programming Language :: Python :: Implementation :: PyPy' - ] -) +setup() From 0a841cd1e5dfe1638a3cb5f70af83b63bc152ccf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 21 Feb 2024 13:00:34 +0000 Subject: [PATCH 08/14] Fix race condition on alert presence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `expected.alert_is_present()` is always true, because it is a Selenium predicate (a `Callable` that expects a web-driver). This currently raises a NoSuchAlertException. We have to actually call the expectation with a driver, which will check if an alert is present, or return false (it internally catches this exception), instead of calling switch_to ourselves. Fixes: 69db96c ("Reorder element checking as to avoid 10s wait for dialogue pop-up") Signed-off-by: Edwin Török --- ragelib/image_fetcher.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ragelib/image_fetcher.py b/ragelib/image_fetcher.py index a620af5..cf67ef9 100644 --- a/ragelib/image_fetcher.py +++ b/ragelib/image_fetcher.py @@ -25,8 +25,8 @@ def get_graph_screenshot(self, url, driver): wait.until(expected.visibility_of_element_located((By.ID, 'graph_title'))) self.logger.debug("Element #graph_title now visible, graph loading") - if expected.alert_is_present(): - alert = driver.switch_to.alert + alert = expected.alert_is_present()(driver) + if alert: if "About to plot" in alert.text: alert.accept() self.logger.warn("Many points alert box accepted") From e5a31a3101824211d384e6b36665262b085233ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 21 Feb 2024 15:34:09 +0000 Subject: [PATCH 09/14] Update to Selenium 4.18.1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Selenium has broken backwards compatibility with 3.9.0: * executable_path is no longer valid, now a service parameter needs to be used * firefox_options got renamed to option Add an upper bound on major version to avoid the library breaking again. Signed-off-by: Edwin Török --- ragelib/image_fetcher.py | 6 ++++-- setup.cfg | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/ragelib/image_fetcher.py b/ragelib/image_fetcher.py index a620af5..b364263 100644 --- a/ragelib/image_fetcher.py +++ b/ragelib/image_fetcher.py @@ -2,6 +2,7 @@ from selenium.webdriver import Firefox from selenium.webdriver.common.by import By from selenium.webdriver.firefox.options import Options +from selenium.webdriver.firefox.service import Service from selenium.webdriver.support import expected_conditions as expected from selenium.webdriver.support.wait import WebDriverWait from tqdm import tqdm @@ -15,7 +16,8 @@ def __init__(self, data, geckodriver_path, logger): options.add_argument('-headless') logger.info(f"Using {geckodriver_path} for geckodriver") - self.driver = Firefox(executable_path=geckodriver_path, firefox_options=options) + serv = Service(geckodriver_path) + self.driver = Firefox(service=serv, options=options) def get_graph_screenshot(self, url, driver): wait = WebDriverWait(driver, timeout=60) @@ -39,7 +41,7 @@ def get_graph_screenshot(self, url, driver): wait.until(expected.invisibility_of_element_located((By.ID, 'progress_img'))) self.logger.debug("Element #progress_img now invisible, graph loaded") - canvas = driver.find_element_by_tag_name('canvas') + canvas = driver.find_element(By.TAG_NAME, 'canvas') canvas_bytes = canvas.screenshot_as_base64 return canvas_bytes diff --git a/setup.cfg b/setup.cfg index 6654ae8..22a05b0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -20,7 +20,7 @@ classifiers = [options] packages = find: install_requires = - selenium >= 4.6.0 + selenium >= 4.18.1, <5 bs4 tqdm python_requires = >=3.6, <4 From 1874f803e77f99d420bddff829340abc7682e6d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 21 Feb 2024 15:38:00 +0000 Subject: [PATCH 10/14] setup.cfg: fix typo --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 22a05b0..72e262c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -7,7 +7,7 @@ author = Xenserver license = BSD-3-Clause url = https://github.com/perf101/ragelib project_urls = - Bug Tracker = https://github.com/perf101/ragelibissues + Bug Tracker = https://github.com/perf101/ragelib/issues Source Code = https://github.com/perf101/ragelib classifiers = License :: OSI Approved :: BSD License From 13dec0f7feede421fab39c9baa5b2bd66be5f57b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 21 Feb 2024 15:44:32 +0000 Subject: [PATCH 11/14] Delete __pycache__ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Edwin Török --- ragelib/__pycache__/__init__.cpython-36.pyc | Bin 143 -> 0 bytes ragelib/__pycache__/body_writer.cpython-36.pyc | Bin 1936 -> 0 bytes ragelib/__pycache__/image_fetcher.cpython-36.pyc | Bin 1902 -> 0 bytes ragelib/__pycache__/report_parser.cpython-36.pyc | Bin 1938 -> 0 bytes 4 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 ragelib/__pycache__/__init__.cpython-36.pyc delete mode 100644 ragelib/__pycache__/body_writer.cpython-36.pyc delete mode 100644 ragelib/__pycache__/image_fetcher.cpython-36.pyc delete mode 100644 ragelib/__pycache__/report_parser.cpython-36.pyc diff --git a/ragelib/__pycache__/__init__.cpython-36.pyc b/ragelib/__pycache__/__init__.cpython-36.pyc deleted file mode 100644 index 1d7a94c31b8d9487cb3d68e31c50a207223669c8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 143 zcmXr!<>gv*ZCVrq5IhDEFu(|8H~?`m3y?@*2xib^^jpbL1QJFNzigbXVnT~ki;82C z6LWG(b2Ah3QWA4xij#{HOOi8Uic$;mi%Q~wyz-pXqL|{M?d|>dcl4`6$p2)i z*&u%nUG2m0m~g`B74(@E5pEq5-%jBBqAj!^!6!i?%1LB#%B?FB*}~yAcdp2cMm3OY zyl&(=-{CIo@9+lqV0L+vw_tAYHk`j(?mqeHhwr}~^UGgks)bC@2R587=ntW*Js75R zN*I}1FoIvOr9Iy&+dS4W8;Y2xqk$?NT>5tDD3P5?w2$_T#zm@!EY0JAQ2!EHgN>J8 zJ-pw4s)SViB+jz)JdH;@&ig8nu}+44DJEm78LTccA^S=u{Sye2U6?qJCYPnlSUO5I zW9Uc0)e=JMw0Lh7(a~Hhp{M~3XM|^xzz9Lid2rh6&0iM$<$-)FZpo3%R z3QY!}`djM-)o?E}`j_?GIj&&&w6v9$a5dJ^-N@41fUSl)7{2s^ELA!g=aaC3r{U3c zQhc@+_1QQbu|#Bi=Em|KDu2Q|=}=4#DH>Aki_pT-BVi-R}%SO8tQn-)T|tv^eMbbH5kg!waCjVxUR&nJph37Hq+7# Ls+dA!4_f3EB5Sr` diff --git a/ragelib/__pycache__/image_fetcher.cpython-36.pyc b/ragelib/__pycache__/image_fetcher.cpython-36.pyc deleted file mode 100644 index 6e1c9b4f0db2ebf331caaedf0480fb999edac900..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1902 zcmZ`(OK%%D5GJ_~ttGi}WT!}s7FJ&e0j|)ZMGr*~xQUx07xz#E1_%YjYDukE*@sMW zZA(}K1(ZWCzV;I22lNm1u-BgY7kcWDDrl9J4!j^&-y%{=N^-c9|iMFY!w=|cq(*sB4p&kK{GsTtY-AXU!P<)k975qI7= zMxwonR4#X~2Jxfr(gceYs~ELvmxns6{j`WWsp^VE+c$!L_AaI#4g&*5xd2aF|oq8T&ysxA+DmD(4`0m?*FF!E09f*|N1g>_^p{V6OBkVR_u<=|AWntKmp3&O@ zKbQ>^duMnDc9bCqcX1E5oe$v;le7V6i%;G|*iGjn$fq5qUf!1dBUoJQ1Kt2LriomU zL-b$<`krkq1{L47I(aJj8ixofy4MyDk-K4L=$1+GQ*-{^EL1OaeRgX#-7r~)7e@8DgC z#p1zxkyzI~8u+GAO~x3j+l*xeFQNaiy35#U8K%vS+%}J~FWNJl=@DzhWJsDD%~S@1 z0S-FBT`M~pY|2)<+)6Tv=7!*jrpl)C=cvY1;nnwR_ad^W_@)pM!u z5v)j(Q9;Tn2)R;EXJ37J?j#j+Z+icSYpou2>U2z(!&nEXFQ=B4@R>-xu0Q zJw)yJ;1}P=@S1xtShApkF4=%$tK@QI_+mDk@P-56n;(b~rh>I0mzmQtAVv zhpVO5>R|TVMR~3&W%E1$g$4&gW7^!k?%iO88B*Mb2fs+zyZJT<3rNrsg1JSG$Przl zi+s;9E6x#`G8b6+lXao7TFoF)%xdpKJFEf5-5E=p3kl@a%&NLV4-Fb2yxG1%=N9nw ztTy1_0@$p0wTx(y1rsetKp_~ZN~BcZ0pM*e8)#yzi`gVERdw74$coCtSEyX!ZZNql zgH6w*@QV~6YX1I zFc|oJonu|-cVW167FV!*_u(}k!7zsO|G0e$eZHVa`l2qRioPRQdX{eQs@efq#bWEi{LKIF{t_jfM;u7EzYy} znKJ6Ilf52x$EWmv=(!)oVz&BpU#W2-O)YJ?qXvLtA~V l9TAO!IK-b{-%EMM56u_;Z^f^?PKAqhT3Wk!OZ&vc?0 Date: Fri, 1 Mar 2024 17:38:52 +0000 Subject: [PATCH 12/14] CA-389464: avoid race condition with page load MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is some race condition between page loads and fetching the images: force new instance. Even though the code waits for certain page elements to appear and be in the correct state, apparently this only works on the first page. When loading the next page these elements will already be there and the page will be considered already loaded even though it hasn't fully finished loading yet. This is a "big hammer" approach, followup commits will use tabs instead of an entire new process. The effect of the race condition is that you see graphs from completely unrelated measurements shown in place of other graphs (which is a distinct bug from the "duplicate title causing overwrite" bug) Signed-off-by: Edwin Török --- ragelib/image_fetcher.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/ragelib/image_fetcher.py b/ragelib/image_fetcher.py index 7a5dccb..5ca7486 100644 --- a/ragelib/image_fetcher.py +++ b/ragelib/image_fetcher.py @@ -12,12 +12,11 @@ def __init__(self, data, geckodriver_path, logger): self.data = data self.logger = logger - options = Options() - options.add_argument('-headless') + self.options = Options() + self.options.add_argument('-headless') logger.info(f"Using {geckodriver_path} for geckodriver") - serv = Service(geckodriver_path) - self.driver = Firefox(service=serv, options=options) + self.serv = Service(geckodriver_path) def get_graph_screenshot(self, url, driver): wait = WebDriverWait(driver, timeout=60) @@ -48,8 +47,9 @@ def get_graph_screenshot(self, url, driver): def fetch_images(self): for item in tqdm(self.data, desc='Fetching graphs...'): + driver = Firefox(service=self.serv, options=self.options) try: - item['graph_bytes'] = self.get_graph_screenshot(item['graph_link'], self.driver) + item['graph_bytes'] = self.get_graph_screenshot(item['graph_link'], driver) item['graph_exception'] = None except TimeoutException: self.logger.warn("Failed while fetching image for link "+item['graph_link']) @@ -60,6 +60,5 @@ def fetch_images(self): print(e) item['graph_exception'] = str(e) item['graph_bytes'] = None - - self.driver.quit() + driver.quit() return self.data From c974bdfca7f95b683da407304caa6b6c12a771af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Fri, 1 Mar 2024 20:50:33 +0000 Subject: [PATCH 13/14] CA-389464: use tabs instead of completely new process MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Switch to a new tab for each test, and then close it and switch back to the empty page. This ensures that every image fetch correctly waits for the entire page load, but without the overhead of spawning an entirely new browser process every time. Signed-off-by: Edwin Török --- ragelib/image_fetcher.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/ragelib/image_fetcher.py b/ragelib/image_fetcher.py index 5ca7486..5b053f1 100644 --- a/ragelib/image_fetcher.py +++ b/ragelib/image_fetcher.py @@ -46,8 +46,11 @@ def get_graph_screenshot(self, url, driver): return canvas_bytes def fetch_images(self): + driver = Firefox(service=self.serv, options=self.options) + driver.get("about:blank") + orig = driver.current_window_handle for item in tqdm(self.data, desc='Fetching graphs...'): - driver = Firefox(service=self.serv, options=self.options) + driver.switch_to.new_window('tab') try: item['graph_bytes'] = self.get_graph_screenshot(item['graph_link'], driver) item['graph_exception'] = None @@ -60,5 +63,8 @@ def fetch_images(self): print(e) item['graph_exception'] = str(e) item['graph_bytes'] = None - driver.quit() + driver.close() + # have to switch back, otherwise we can't open new tabs anymore + driver.switch_to.window(orig) + driver.quit() return self.data From 79a69c77d0a09f4fd9b48d8937e39f725db0e018 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Mon, 4 Mar 2024 09:51:41 +0000 Subject: [PATCH 14/14] driver: use context manager MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid leaving browser processes behind on errors Signed-off-by: Edwin Török --- ragelib/image_fetcher.py | 41 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/ragelib/image_fetcher.py b/ragelib/image_fetcher.py index 5b053f1..44208a0 100644 --- a/ragelib/image_fetcher.py +++ b/ragelib/image_fetcher.py @@ -46,25 +46,24 @@ def get_graph_screenshot(self, url, driver): return canvas_bytes def fetch_images(self): - driver = Firefox(service=self.serv, options=self.options) - driver.get("about:blank") - orig = driver.current_window_handle - for item in tqdm(self.data, desc='Fetching graphs...'): - driver.switch_to.new_window('tab') - try: - item['graph_bytes'] = self.get_graph_screenshot(item['graph_link'], driver) - item['graph_exception'] = None - except TimeoutException: - self.logger.warn("Failed while fetching image for link "+item['graph_link']) - item['graph_exception'] = "TimeoutException" - item['graph_bytes'] = None - except Exception as e: - self.logger.warn("Failed while fetching image for link "+item['graph_link']) - print(e) - item['graph_exception'] = str(e) - item['graph_bytes'] = None - driver.close() - # have to switch back, otherwise we can't open new tabs anymore - driver.switch_to.window(orig) - driver.quit() + with Firefox(service=self.serv, options=self.options) as driver: + driver.get("about:blank") + orig = driver.current_window_handle + for item in tqdm(self.data, desc='Fetching graphs...'): + driver.switch_to.new_window('tab') + try: + item['graph_bytes'] = self.get_graph_screenshot(item['graph_link'], driver) + item['graph_exception'] = None + except TimeoutException: + self.logger.warn("Failed while fetching image for link "+item['graph_link']) + item['graph_exception'] = "TimeoutException" + item['graph_bytes'] = None + except Exception as e: + self.logger.warn("Failed while fetching image for link "+item['graph_link']) + print(e) + item['graph_exception'] = str(e) + item['graph_bytes'] = None + driver.close() + # have to switch back, otherwise we can't open new tabs anymore + driver.switch_to.window(orig) return self.data