From 5872a660d619e6312d6a592306e5e6fb858a3356 Mon Sep 17 00:00:00 2001 From: Michael Mintz Date: Mon, 8 Jul 2024 16:57:22 -0400 Subject: [PATCH 1/4] Update UC Mode --- help_docs/method_summary.md | 6 +- help_docs/uc_mode.md | 12 +- seleniumbase/core/browser_launcher.py | 234 +++++++++++++++++++++----- seleniumbase/fixtures/base_case.py | 4 + 4 files changed, 215 insertions(+), 41 deletions(-) diff --git a/help_docs/method_summary.md b/help_docs/method_summary.md index 557d913b..c6dd040e 100644 --- a/help_docs/method_summary.md +++ b/help_docs/method_summary.md @@ -1076,7 +1076,11 @@ driver.uc_gui_write(text) # Similar to uc_gui_press_keys(), but faster driver.uc_gui_click_x_y(x, y, timeframe=0.25) # PyAutoGUI click screen -driver.uc_gui_click_cf(frame="iframe", retry=False, blind=False) # (*) +driver.uc_gui_click_captcha(frame="iframe", retry=False, blind=False) + +driver.uc_gui_click_rc(frame="iframe", retry=False, blind=False) # reC + +driver.uc_gui_click_cf(frame="iframe", retry=False, blind=False) # CFT driver.uc_gui_handle_cf(frame="iframe") # PyAutoGUI click CF Turnstile diff --git a/help_docs/uc_mode.md b/help_docs/uc_mode.md index e6155d0c..2eb56095 100644 --- a/help_docs/uc_mode.md +++ b/help_docs/uc_mode.md @@ -68,7 +68,7 @@ with SB(uc=True, test=True) as sb: from seleniumbase import SB with SB(uc=True, test=True) as sb: - url = "seleniumbase.io/apps/turnstile" + url = "https://seleniumbase.io/apps/turnstile" sb.uc_open_with_reconnect(url, reconnect_time=2) sb.uc_gui_handle_cf() sb.assert_element("img#captcha-success", timeout=3) @@ -78,7 +78,7 @@ with SB(uc=True, test=True) as sb: -If running on a Linux server, `uc_gui_handle_cf()` might not be good enough. Switch to `uc_gui_click_cf()` to be more stealthy. +If running on a Linux server, `uc_gui_handle_cf()` might not be good enough. Switch to `uc_gui_click_cf()` to be more stealthy. You can also use `uc_gui_click_captcha()` as a generic CAPTCHA-clicker, which auto-detects between CF Turnstile and reCAPTCHA. 👤 Here's an example where the CAPTCHA appears after submitting a form: @@ -192,6 +192,10 @@ driver.uc_gui_write(text) driver.uc_gui_click_x_y(x, y, timeframe=0.25) +driver.uc_gui_click_captcha(frame="iframe", retry=False, blind=False) + +driver.uc_gui_click_rc(frame="iframe", retry=False, blind=False) + driver.uc_gui_click_cf(frame="iframe", retry=False, blind=False) driver.uc_gui_handle_cf(frame="iframe") @@ -235,6 +239,10 @@ driver.reconnect("breakpoint") 👤 `driver.uc_gui_click_cf(frame="iframe", retry=False, blind=False)` has three args. (All optional). The first one, `frame`, lets you specify the iframe in case the CAPTCHA is not located in the first iframe on the page. The second one, `retry`, lets you retry the click after reloading the page if the first one didn't work (and a CAPTCHA is still present after the page reload). The third arg, `blind`, will retry after a page reload (if the first click failed) by clicking at the last known coordinates of the CAPTCHA checkbox without confirming first with Selenium that a CAPTCHA is still on the page. +👤 `driver.uc_gui_click_rc(frame="iframe", retry=False, blind=False)` is for reCAPTCHA. This may only work a few times before not working anymore... not because Selenium was detected, but because reCAPTCHA uses advanced AI to detect unusual activity, unlike the CF Turnstile, which only uses basic detection. + +👤 `driver.uc_gui_click_captcha()` auto-detects the CAPTCHA type before trying to click it. This is a generic method for both CF Turnstile and Google reCAPTCHA. It will use the code from `uc_gui_click_cf()` and `uc_gui_click_rc()` as needed. + 👤 To find out if UC Mode will work at all on a specific site (before adjusting for timing), load your site with the following script: ```python diff --git a/seleniumbase/core/browser_launcher.py b/seleniumbase/core/browser_launcher.py index 687e5f0e..5e19abbf 100644 --- a/seleniumbase/core/browser_launcher.py +++ b/seleniumbase/core/browser_launcher.py @@ -660,7 +660,7 @@ def get_gui_element_position(driver, selector): return (viewport_x, viewport_y) -def uc_gui_click_x_y(driver, x, y, timeframe=0.25, uc_lock=True): +def _uc_gui_click_x_y(driver, x, y, timeframe=0.25, uc_lock=False): install_pyautogui_if_missing(driver) import pyautogui pyautogui = get_configured_pyautogui(pyautogui) @@ -678,7 +678,7 @@ def uc_gui_click_x_y(driver, x, y, timeframe=0.25, uc_lock=True): with gui_lock: # Prevent issues with multiple processes pyautogui.moveTo(x, y, timeframe, pyautogui.easeOutQuad) if timeframe >= 0.25: - time.sleep(0.0555) # Wait if moving at human-speed + time.sleep(0.056) # Wait if moving at human-speed if "--debug" in sys.argv: print(" pyautogui.click(%s, %s)" % (x, y)) pyautogui.click(x=x, y=y) @@ -686,30 +686,71 @@ def uc_gui_click_x_y(driver, x, y, timeframe=0.25, uc_lock=True): # Called from a method where the gui_lock is already active pyautogui.moveTo(x, y, timeframe, pyautogui.easeOutQuad) if timeframe >= 0.25: - time.sleep(0.0555) # Wait if moving at human-speed + time.sleep(0.056) # Wait if moving at human-speed if "--debug" in sys.argv: print(" pyautogui.click(%s, %s)" % (x, y)) pyautogui.click(x=x, y=y) -def on_a_cf_turnstile_page(driver): +def uc_gui_click_x_y(driver, x, y, timeframe=0.25): + _uc_gui_click_x_y(driver, x, y, timeframe=timeframe, uc_lock=True) + + +def _on_a_cf_turnstile_page(driver): source = driver.get_page_source() if ( - "//challenges.cloudflare.com" in source - or 'aria-label="Cloudflare"' in source + 'data-callback="onCaptchaSuccess"' in source + or "cf-turnstile-wrapper" in source ): return True return False -def uc_gui_click_cf(driver, frame="iframe", retry=False, blind=False): - if not on_a_cf_turnstile_page(driver): - return +def _on_a_g_recaptcha_page(driver): + source = driver.get_page_source() + if ( + 'id="recaptcha-token"' in source + or 'title="reCAPTCHA"' in source + ): + return True + return False + + +def _uc_gui_click_captcha( + driver, + frame="iframe", + retry=False, + blind=False, + ctype=None, +): + _on_a_captcha_page = None + if ctype == "cf_t": + if not _on_a_cf_turnstile_page(driver): + return + else: + _on_a_captcha_page = _on_a_cf_turnstile_page + elif ctype == "g_rc": + if not _on_a_g_recaptcha_page(driver): + return + else: + _on_a_captcha_page = _on_a_g_recaptcha_page + else: + if _on_a_g_recaptcha_page(driver): + ctype = "g_rc" + _on_a_captcha_page = _on_a_g_recaptcha_page + elif _on_a_cf_turnstile_page(driver): + ctype = "cf_t" + _on_a_captcha_page = _on_a_cf_turnstile_page + else: + return install_pyautogui_if_missing(driver) import pyautogui pyautogui = get_configured_pyautogui(pyautogui) + i_x = None + i_y = None x = None y = None + visible_iframe = True gui_lock = fasteners.InterProcessLock( constants.MultiBrowser.PYAUTOGUILOCK ) @@ -725,22 +766,54 @@ def uc_gui_click_cf(driver, frame="iframe", retry=False, blind=False): page_actions.switch_to_window( driver, driver.current_window_handle, 2, uc_lock=False ) + if ctype == "cf_t": + if ( + driver.is_element_present(".cf-turnstile-wrapper iframe") + or driver.is_element_present( + '[data-callback="onCaptchaSuccess"] iframe' + ) + ): + pass + else: + visible_iframe = False + if driver.is_element_present(".cf-turnstile-wrapper"): + frame = ".cf-turnstile-wrapper" + elif driver.is_element_present( + '[data-callback="onCaptchaSuccess"]' + ): + frame = '[data-callback="onCaptchaSuccess"]' + else: + return if not is_in_frame or needs_switch: # Currently not in frame (or nested frame outside CF one) try: - i_x, i_y = get_gui_element_position(driver, "iframe") - driver.switch_to_frame(frame) + i_x, i_y = get_gui_element_position(driver, frame) + if visible_iframe: + driver.switch_to_frame(frame) except Exception: - if driver.is_element_present("iframe"): - i_x, i_y = get_gui_element_position(driver, "iframe") - driver.switch_to_frame("iframe") - else: - return + if visible_iframe: + if driver.is_element_present("iframe"): + i_x, i_y = get_gui_element_position(driver, "iframe") + driver.switch_to_frame("iframe") + else: + return + if not i_x or not i_y: + return try: - selector = "span" - element = driver.wait_for_element_present(selector, timeout=2.5) - x = i_x + element.rect["x"] + int(element.rect["width"] / 2) + 1 - y = i_y + element.rect["y"] + int(element.rect["height"] / 2) + 1 + if visible_iframe: + selector = "span" + if ctype == "g_rc": + selector = "span.recaptcha-checkbox" + element = driver.wait_for_element_present( + selector, timeout=2.5 + ) + x = i_x + element.rect["x"] + int(element.rect["width"] / 2) + x += 1 + y = i_y + element.rect["y"] + int(element.rect["height"] / 2) + y += 1 + else: + x = i_x + 34 + y = i_y + 34 driver.switch_to.default_content() except Exception: try: @@ -751,46 +824,91 @@ def uc_gui_click_cf(driver, frame="iframe", retry=False, blind=False): try: if x and y: sb_config._saved_cf_x_y = (x, y) - uc_gui_click_x_y(driver, x, y, timeframe=0.842, uc_lock=False) + _uc_gui_click_x_y(driver, x, y, timeframe=0.95) except Exception: pass reconnect_time = (float(constants.UC.RECONNECT_TIME) / 2.0) + 0.5 if IS_LINUX: - reconnect_time = constants.UC.RECONNECT_TIME + reconnect_time = constants.UC.RECONNECT_TIME + 0.15 if not x or not y: reconnect_time = 1 # Make it quick (it already failed) driver.reconnect(reconnect_time) if blind: retry = True - if retry and x and y and on_a_cf_turnstile_page(driver): + if retry and x and y and _on_a_captcha_page(driver): with gui_lock: # Prevent issues with multiple processes # Make sure the window is on top page_actions.switch_to_window( driver, driver.current_window_handle, 2, uc_lock=False ) - driver.switch_to_frame("iframe") - if driver.is_element_visible("#success-icon"): - driver.switch_to.parent_frame() + if not driver.is_element_present("iframe"): return + else: + try: + driver.switch_to_frame(frame) + except Exception: + try: + driver.switch_to_frame("iframe") + except Exception: + return + checkbox_success = None + if ctype == "cf_t": + checkbox_success = "#success-icon" + elif ctype == "g_rc": + checkbox_success = "span.recaptcha-checkbox-checked" + else: + return # If this line is reached, ctype wasn't set + if driver.is_element_visible("#success-icon"): + driver.switch_to.parent_frame(checkbox_success) + return if blind: driver.uc_open_with_disconnect(driver.current_url, 3.8) - uc_gui_click_x_y(driver, x, y, timeframe=1.05, uc_lock=False) + _uc_gui_click_x_y(driver, x, y, timeframe=1.05) else: driver.uc_open_with_reconnect(driver.current_url, 3.8) - if on_a_cf_turnstile_page(driver): + if _on_a_captcha_page(driver): driver.disconnect() - uc_gui_click_x_y( - driver, x, y, timeframe=1.05, uc_lock=False - ) + _uc_gui_click_x_y(driver, x, y, timeframe=1.05) driver.reconnect(reconnect_time) +def uc_gui_click_captcha(driver, frame="iframe", retry=False, blind=False): + _uc_gui_click_captcha( + driver, + frame=frame, + retry=retry, + blind=blind, + ctype=None, + ) + + +def uc_gui_click_rc(driver, frame="iframe", retry=False, blind=False): + _uc_gui_click_captcha( + driver, + frame=frame, + retry=retry, + blind=blind, + ctype="g_rc", + ) + + +def uc_gui_click_cf(driver, frame="iframe", retry=False, blind=False): + _uc_gui_click_captcha( + driver, + frame=frame, + retry=retry, + blind=blind, + ctype="cf_t", + ) + + def uc_gui_handle_cf(driver, frame="iframe"): - if not on_a_cf_turnstile_page(driver): + if not _on_a_cf_turnstile_page(driver): return install_pyautogui_if_missing(driver) import pyautogui pyautogui = get_configured_pyautogui(pyautogui) + visible_iframe = True gui_lock = fasteners.InterProcessLock( constants.MultiBrowser.PYAUTOGUILOCK ) @@ -806,16 +924,46 @@ def uc_gui_handle_cf(driver, frame="iframe"): page_actions.switch_to_window( driver, driver.current_window_handle, 2, uc_lock=False ) + if ( + driver.is_element_present(".cf-turnstile-wrapper iframe") + or driver.is_element_present( + '[data-callback="onCaptchaSuccess"] iframe' + ) + ): + pass + else: + visible_iframe = False + if driver.is_element_present(".cf-turnstile-wrapper"): + frame = ".cf-turnstile-wrapper" + elif driver.is_element_present( + '[data-callback="onCaptchaSuccess"]' + ): + frame = '[data-callback="onCaptchaSuccess"]' + else: + return if not is_in_frame or needs_switch: # Currently not in frame (or nested frame outside CF one) try: - driver.switch_to_frame(frame) + if visible_iframe: + driver.switch_to_frame(frame) except Exception: - if driver.is_element_present("iframe"): - driver.switch_to_frame("iframe") - else: - return + if visible_iframe: + if driver.is_element_present("iframe"): + driver.switch_to_frame("iframe") + else: + return try: + found_checkbox = False + for i in range(10): + pyautogui.press("\t") + time.sleep(0.02) + active_element_css = js_utils.get_active_element_css(driver) + if active_element_css == "div.cf-turnstile-wrapper": + found_checkbox = True + break + time.sleep(0.02) + if not found_checkbox: + return driver.execute_script('document.querySelector("input").focus()') except Exception: try: @@ -829,7 +977,7 @@ def uc_gui_handle_cf(driver, frame="iframe"): pass reconnect_time = (float(constants.UC.RECONNECT_TIME) / 2.0) + 0.5 if IS_LINUX: - reconnect_time = constants.UC.RECONNECT_TIME + reconnect_time = constants.UC.RECONNECT_TIME + 0.15 driver.reconnect(reconnect_time) @@ -4166,6 +4314,16 @@ def get_local_driver( driver, *args, **kwargs ) ) + driver.uc_gui_click_captcha = ( + lambda *args, **kwargs: uc_gui_click_captcha( + driver, *args, **kwargs + ) + ) + driver.uc_gui_click_rc = ( + lambda *args, **kwargs: uc_gui_click_rc( + driver, *args, **kwargs + ) + ) driver.uc_gui_click_cf = ( lambda *args, **kwargs: uc_gui_click_cf( driver, *args, **kwargs diff --git a/seleniumbase/fixtures/base_case.py b/seleniumbase/fixtures/base_case.py index 1ebae913..fbbaa711 100644 --- a/seleniumbase/fixtures/base_case.py +++ b/seleniumbase/fixtures/base_case.py @@ -4242,6 +4242,10 @@ class BaseCase(unittest.TestCase): self.uc_gui_write = new_driver.uc_gui_write if hasattr(new_driver, "uc_gui_click_x_y"): self.uc_gui_click_x_y = new_driver.uc_gui_click_x_y + if hasattr(new_driver, "uc_gui_click_captcha"): + self.uc_gui_click_captcha = new_driver.uc_gui_click_captcha + if hasattr(new_driver, "uc_gui_click_rc"): + self.uc_gui_click_rc = new_driver.uc_gui_click_rc if hasattr(new_driver, "uc_gui_click_cf"): self.uc_gui_click_cf = new_driver.uc_gui_click_cf if hasattr(new_driver, "uc_gui_handle_cf"): From 65c15ab1e5202d813e57da2a3f5dce7620489926 Mon Sep 17 00:00:00 2001 From: Michael Mintz Date: Mon, 8 Jul 2024 16:58:44 -0400 Subject: [PATCH 2/4] Update examples --- examples/raw_recaptcha.py | 9 +++++++++ examples/raw_turnstile.py | 6 +++--- examples/raw_uc_mode.py | 2 +- 3 files changed, 13 insertions(+), 4 deletions(-) create mode 100644 examples/raw_recaptcha.py diff --git a/examples/raw_recaptcha.py b/examples/raw_recaptcha.py new file mode 100644 index 00000000..c348bfdc --- /dev/null +++ b/examples/raw_recaptcha.py @@ -0,0 +1,9 @@ +from seleniumbase import SB + +with SB(uc=True, test=True) as sb: + url = "https://seleniumbase.io/apps/recaptcha" + sb.uc_open_with_reconnect(url) + sb.uc_gui_click_captcha() + sb.assert_element("img#captcha-success", timeout=3) + sb.set_messenger_theme(location="top_left") + sb.post_message("SeleniumBase wasn't detected", duration=3) diff --git a/examples/raw_turnstile.py b/examples/raw_turnstile.py index 3f45b83f..9b907ae4 100644 --- a/examples/raw_turnstile.py +++ b/examples/raw_turnstile.py @@ -1,9 +1,9 @@ from seleniumbase import SB with SB(uc=True, test=True) as sb: - url = "seleniumbase.io/apps/turnstile" - sb.uc_open_with_reconnect(url, reconnect_time=2) - sb.uc_gui_handle_cf() + url = "https://seleniumbase.io/apps/turnstile" + sb.uc_open_with_reconnect(url) + sb.uc_gui_click_captcha() sb.assert_element("img#captcha-success", timeout=3) sb.set_messenger_theme(location="top_left") sb.post_message("SeleniumBase wasn't detected", duration=3) diff --git a/examples/raw_uc_mode.py b/examples/raw_uc_mode.py index c0cd2574..90dee460 100644 --- a/examples/raw_uc_mode.py +++ b/examples/raw_uc_mode.py @@ -4,7 +4,7 @@ from seleniumbase import SB with SB(uc=True, test=True) as sb: url = "https://gitlab.com/users/sign_in" sb.uc_open_with_reconnect(url, 4) - sb.uc_gui_click_cf() + sb.uc_gui_click_captcha() sb.assert_text("Username", '[for="user_login"]', timeout=3) sb.assert_element('label[for="user_login"]') sb.highlight('button:contains("Sign in")') From 719b6b44a777611e878c2fef657dc4e238594c08 Mon Sep 17 00:00:00 2001 From: Michael Mintz Date: Mon, 8 Jul 2024 16:59:13 -0400 Subject: [PATCH 3/4] Refresh Python dependencies --- requirements.txt | 6 +++--- setup.py | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/requirements.txt b/requirements.txt index f9e49ca3..4e994979 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ pip>=24.0;python_version<"3.8" -pip>=24.1.1;python_version>="3.8" +pip>=24.1.2;python_version>="3.8" packaging>=24.0;python_version<"3.8" packaging>=24.1;python_version>="3.8" setuptools>=68.0.0;python_version<"3.8" @@ -21,8 +21,8 @@ six==1.16.0 idna==3.7 chardet==5.2.0 charset-normalizer==3.3.2 -urllib3>=1.26.18,<2;python_version<"3.10" -urllib3>=1.26.18,<2.3.0;python_version>="3.10" +urllib3>=1.26.19,<2;python_version<"3.10" +urllib3>=1.26.19,<2.3.0;python_version>="3.10" requests==2.31.0 pynose==1.5.1 sniffio==1.3.1 diff --git a/setup.py b/setup.py index df3621f2..12badf83 100755 --- a/setup.py +++ b/setup.py @@ -147,7 +147,7 @@ setup( python_requires=">=3.7", install_requires=[ 'pip>=24.0;python_version<"3.8"', - 'pip>=24.1.1;python_version>="3.8"', + 'pip>=24.1.2;python_version>="3.8"', 'packaging>=24.0;python_version<"3.8"', 'packaging>=24.1;python_version>="3.8"', 'setuptools>=68.0.0;python_version<"3.8"', @@ -169,8 +169,8 @@ setup( "idna==3.7", 'chardet==5.2.0', 'charset-normalizer==3.3.2', - 'urllib3>=1.26.18,<2;python_version<"3.10"', - 'urllib3>=1.26.18,<2.3.0;python_version>="3.10"', + 'urllib3>=1.26.19,<2;python_version<"3.10"', + 'urllib3>=1.26.19,<2.3.0;python_version>="3.10"', 'requests==2.31.0', "pynose==1.5.1", 'sniffio==1.3.1', @@ -259,7 +259,7 @@ setup( # (An optional library for parsing PDF files.) "pdfminer": [ 'pdfminer.six==20221105;python_version<"3.8"', - 'pdfminer.six==20231228;python_version>="3.8"', + 'pdfminer.six==20240706;python_version>="3.8"', 'cryptography==39.0.2;python_version<"3.9"', 'cryptography==42.0.8;python_version>="3.9"', 'cffi==1.15.1;python_version<"3.8"', @@ -286,7 +286,7 @@ setup( ], # pip install -e .[psutil] "psutil": [ - "psutil==5.9.8", + "psutil==6.0.0", ], # pip install -e .[pyautogui] "pyautogui": [ From 59602bd1cbfd1f3015bb5a9e6e3abadf6a0269d9 Mon Sep 17 00:00:00 2001 From: Michael Mintz Date: Mon, 8 Jul 2024 16:59:38 -0400 Subject: [PATCH 4/4] Version 4.28.5 --- seleniumbase/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seleniumbase/__version__.py b/seleniumbase/__version__.py index e965f8ed..6268d11b 100755 --- a/seleniumbase/__version__.py +++ b/seleniumbase/__version__.py @@ -1,2 +1,2 @@ # seleniumbase package -__version__ = "4.28.4" +__version__ = "4.28.5"