fd33cad4cf | ||
---|---|---|
.. | ||
ReadMe.md | ||
__init__.py | ||
raw_antibot.py | ||
raw_async.py | ||
raw_bestwestern.py | ||
raw_cdp.py | ||
raw_cdp_with_sb.py | ||
raw_easyjet.py | ||
raw_footlocker.py | ||
raw_gitlab.py | ||
raw_hyatt.py | ||
raw_pokemon.py | ||
raw_priceline.py | ||
raw_req_async.py | ||
raw_req_sb.py | ||
raw_res_sb.py | ||
raw_walmart.py |
ReadMe.md
CDP Mode 🐙
🐙 SeleniumBase CDP Mode (Chrome Devtools Protocol Mode) is a special mode inside of SeleniumBase UC Mode that lets bots appear human while controlling the browser with the CDP-Driver. Although regular UC Mode can't perform WebDriver actions while the driver
is disconnected from the browser, the CDP-Driver can still perform actions while maintaining its cover. (For Python 3.11 or newer!)
👤 UC Mode avoids bot-detection by first disconnecting WebDriver from the browser at strategic times, calling special PyAutoGUI
methods to bypass CAPTCHAs (as needed), and finally reconnecting the driver
afterwards so that WebDriver actions can be performed again. Although this approach works for bypassing simple CAPTCHAs, more flexibility is needed for bypassing bot-detection on websites with advanced protection. (That's where CDP Mode comes in.)
🐙 CDP Mode is based on python-cdp, trio-cdp, and nodriver. trio-cdp
is an early implementation of python-cdp
, and nodriver
is a modern implementation of python-cdp
. (Refactored Python-CDP code is imported from MyCDP.)
🐙 CDP Mode includes multiple updates to the above, such as:
- Sync methods. (Using
async
/await
is not necessary!) - The ability to use WebDriver and CDP-Driver together.
- Backwards compatibility for existing UC Mode scripts.
- More configuration options when launching browsers.
- More methods. (And bug-fixes for existing methods.)
- Faster response time for support. (Eg. Discord Chat)
🐙 CDP Mode usage:
sb.activate_cdp_mode(url)
(Call that from a UC Mode script)
That disconnects WebDriver from Chrome (which prevents detection), and gives you access to sb.cdp
methods (which don't trigger anti-bot checks).
🐙 Here are some common sb.cdp
methods:
sb.cdp.click(selector)
sb.cdp.click_if_visible(selector)
sb.cdp.type(selector, text)
sb.cdp.press_keys(selector, text)
sb.cdp.select_all(selector)
sb.cdp.get_text(selector)
When type()
is too fast, use the slower press_keys()
to avoid detection. You can also use sb.sleep(seconds)
to slow things down.
To use WebDriver methods again, call:
sb.reconnect()
orsb.connect()
(Note that reconnecting allows anti-bots to detect you, so only reconnect if it is safe to do so.)
To disconnect again, call:
sb.disconnect()
While disconnected, if you accidentally call a WebDriver method, then SeleniumBase will attempt to use the CDP Mode version of that method (if available). For example, if you accidentally call sb.click(selector)
instead of sb.cdp.click(selector)
, then your WebDriver call will automatically be redirected to the CDP Mode version. Not all WebDriver methods have a matching CDP Mode method. In that scenario, calling a WebDriver method while disconnected could raise an error, or make WebDriver automatically reconnect first.
To find out if WebDriver is connected or disconnected, call:
sb.is_connected()
🐙 CDP Mode examples:
🔖 Example 1: (Pokemon site using Incapsula/Imperva protection with invisible reCAPTCHA)
▶️ (Click to expand code preview)
from seleniumbase import SB
with SB(uc=True, test=True, locale_code="en") as sb:
url = "https://www.pokemon.com/us"
sb.activate_cdp_mode(url)
sb.sleep(3)
sb.cdp.click_if_visible("button#onetrust-reject-all-handler")
sb.sleep(1)
sb.cdp.click('a[href="https://www.pokemon.com/us/pokedex/"]')
sb.sleep(1)
sb.cdp.click('b:contains("Show Advanced Search")')
sb.sleep(1)
sb.cdp.click('span[data-type="type"][data-value="electric"]')
sb.sleep(1)
sb.cdp.click("a#advSearch")
sb.sleep(1)
sb.cdp.click('img[src*="img/pokedex/detail/025.png"]')
sb.cdp.assert_text("Pikachu", 'div[class*="title"]')
sb.cdp.assert_element('img[alt="Pikachu"]')
sb.cdp.scroll_into_view("div.pokemon-ability-info")
sb.sleep(1)
sb.cdp.flash('div[class*="title"]')
sb.cdp.flash('img[alt="Pikachu"]')
sb.cdp.flash("div.pokemon-ability-info")
name = sb.cdp.get_text("label.styled-select")
info = sb.cdp.get_text("div.version-descriptions p.active")
print("*** %s: ***\n* %s" % (name, info))
sb.sleep(2)
sb.cdp.highlight_overlay("div.pokemon-ability-info")
sb.sleep(2)
sb.cdp.click('a[href="https://www.pokemon.com/us/play-pokemon/"]')
sb.cdp.click('h3:contains("Find an Event")')
location = "Concord, MA, USA"
sb.cdp.type('input[data-testid="location-search"]', location)
sb.sleep(1)
sb.cdp.click("div.autocomplete-dropdown-container div.suggestion-item")
sb.cdp.click('img[alt="search-icon"]')
sb.sleep(2)
events = sb.cdp.select_all('div[data-testid="event-name"]')
print("*** Pokemon events near %s: ***" % location)
for event in events:
print("* " + event.text)
sb.sleep(2)
🔖 Example 2: (Hyatt site using Kasada protection)
▶️ (Click to expand code preview)
from seleniumbase import SB
with SB(uc=True, test=True, locale_code="en") as sb:
url = "https://www.hyatt.com/"
sb.activate_cdp_mode(url)
sb.sleep(2)
sb.cdp.click_if_visible('button[aria-label="Close"]')
sb.sleep(1)
sb.cdp.click('span:contains("Explore")')
sb.sleep(1)
sb.cdp.click('a:contains("Hotels & Resorts")')
sb.sleep(3)
location = "Anaheim, CA, USA"
sb.cdp.press_keys("input#searchbox", location)
sb.sleep(1)
sb.cdp.click("div#suggestion-list ul li a")
sb.sleep(1)
sb.cdp.click('div.hotel-card-footer button')
sb.sleep(1)
sb.cdp.click('button[data-locator="find-hotels"]')
sb.sleep(5)
hotel_names = sb.cdp.select_all(
'div[data-booking-status="BOOKABLE"] [class*="HotelCard_header"]'
)
hotel_prices = sb.cdp.select_all(
'div[data-booking-status="BOOKABLE"] div.rate-currency'
)
sb.assert_true(len(hotel_names) == len(hotel_prices))
print("Hyatt Hotels in %s:" % location)
print("(" + sb.cdp.get_text("ul.b-color_text-white") + ")")
if len(hotel_names) == 0:
print("No availability over the selected dates!")
for i, hotel in enumerate(hotel_names):
print("* %s: %s => %s" % (i + 1, hotel.text, hotel_prices[i].text))
🔖 Example 3: (BestWestern site using DataDome protection)
▶️ (Click to expand code preview)
from seleniumbase import SB
with SB(uc=True, test=True, locale_code="en") as sb:
url = "https://www.bestwestern.com/en_US.html"
sb.activate_cdp_mode(url)
sb.sleep(2.5)
sb.cdp.click_if_visible("div.onetrust-close-btn-handler")
sb.sleep(1)
sb.cdp.click("input#destination-input")
sb.sleep(2)
location = "Palm Springs, CA, USA"
sb.cdp.press_keys("input#destination-input", location)
sb.sleep(1)
sb.cdp.click("ul#google-suggestions li")
sb.sleep(1)
sb.cdp.click("button#btn-modify-stay-update")
sb.sleep(4)
sb.cdp.click("label#available-label")
sb.sleep(2.5)
print("Best Western Hotels in %s:" % location)
summary_details = sb.cdp.get_text("#summary-details-column")
dates = summary_details.split("ROOM")[0].split("DATES")[-1].strip()
print("(Dates: %s)" % dates)
flip_cards = sb.cdp.select_all(".flipCard")
for i, flip_card in enumerate(flip_cards):
hotel = flip_card.query_selector(".hotelName")
price = flip_card.query_selector(".priceSection")
if hotel and price:
print("* %s: %s => %s" % (
i + 1, hotel.text.strip(), price.text.strip())
)
(Note: Extra sb.sleep()
calls have been added to prevent bot-detection because some sites will flag you as a bot if you perform actions too quickly.)
(Note: Some sites may IP-block you for 36 hours or more if they catch you using regular Selenium WebDriver. Be extra careful when creating and/or modifying automation scripts that run on them.)
🐙 CDP Mode API / Methods
(Some method args have been left out for simplicity. Eg: timeout
)
sb.cdp.get(url)
sb.cdp.open(url)
sb.cdp.reload(ignore_cache=True, script_to_evaluate_on_load=None)
sb.cdp.refresh()
sb.cdp.get_event_loop()
sb.cdp.add_handler(event, handler)
sb.cdp.find_element(selector)
sb.cdp.find(selector)
sb.cdp.locator(selector)
sb.cdp.find_all(selector)
sb.cdp.find_elements_by_text(text, tag_name=None)
sb.cdp.select(selector)
sb.cdp.select_all(selector)
sb.cdp.find_elements(selector)
sb.cdp.click_link(link_text)
sb.cdp.tile_windows(windows=None, max_columns=0)
sb.cdp.get_all_cookies(*args, **kwargs)
sb.cdp.set_all_cookies(*args, **kwargs)
sb.cdp.save_cookies(*args, **kwargs)
sb.cdp.load_cookies(*args, **kwargs)
sb.cdp.clear_cookies(*args, **kwargs)
sb.cdp.sleep(seconds)
sb.cdp.bring_active_window_to_front()
sb.cdp.bring_to_front()
sb.cdp.get_active_element()
sb.cdp.get_active_element_css()
sb.cdp.click(selector)
sb.cdp.click_active_element()
sb.cdp.click_if_visible(selector)
sb.cdp.mouse_click(selector)
sb.cdp.nested_click(parent_selector, selector)
sb.cdp.get_nested_element(parent_selector, selector)
sb.cdp.flash(selector)
sb.cdp.focus(selector)
sb.cdp.highlight_overlay(selector)
sb.cdp.remove_element(selector)
sb.cdp.remove_from_dom(selector)
sb.cdp.remove_elements(selector)
sb.cdp.scroll_into_view(selector)
sb.cdp.send_keys(selector, text)
sb.cdp.press_keys(selector, text)
sb.cdp.type(selector, text)
sb.cdp.evaluate(expression)
sb.cdp.js_dumps(obj_name)
sb.cdp.maximize()
sb.cdp.minimize()
sb.cdp.medimize()
sb.cdp.set_window_rect()
sb.cdp.reset_window_size()
sb.cdp.get_window()
sb.cdp.get_text(selector)
sb.cdp.get_title()
sb.cdp.get_current_url()
sb.cdp.get_origin()
sb.cdp.get_page_source()
sb.cdp.get_user_agent()
sb.cdp.get_cookie_string()
sb.cdp.get_locale_code()
sb.cdp.get_screen_rect()
sb.cdp.get_window_rect()
sb.cdp.get_window_size()
sb.cdp.get_window_position()
sb.cdp.get_element_rect(selector)
sb.cdp.get_element_size(selector)
sb.cdp.get_element_position(selector)
sb.cdp.get_gui_element_rect(selector)
sb.cdp.get_gui_element_center(selector)
sb.cdp.get_document()
sb.cdp.get_flattened_document()
sb.cdp.get_element_attributes(selector)
sb.cdp.get_element_html(selector)
sb.cdp.set_locale(locale)
sb.cdp.set_attributes(selector, attribute, value)
sb.cdp.gui_click_x_y(x, y)
sb.cdp.gui_click_element(selector)
sb.cdp.internalize_links()
sb.cdp.is_element_present(selector)
sb.cdp.is_element_visible(selector)
sb.cdp.assert_element(selector)
sb.cdp.assert_element_present(selector)
sb.cdp.assert_text(text, selector="html")
sb.cdp.assert_exact_text(text, selector="html")
sb.cdp.scroll_down(amount=25)
sb.cdp.scroll_up(amount=25)
sb.cdp.save_screenshot(name, folder=None, selector=None)
🐙 CDP Mode WebElement API / Methods
element.clear_input()
element.click()
element.flash()
element.focus()
element.highlight_overlay()
element.mouse_click()
element.mouse_drag(destination)
element.mouse_move()
element.query_selector(selector)
element.querySelector(selector)
element.query_selector_all(selector)
element.querySelectorAll(selector)
element.remove_from_dom()
element.save_screenshot(*args, **kwargs)
element.save_to_dom()
element.scroll_into_view()
element.select_option()
element.send_file(*file_paths)
element.send_keys(text)
element.set_text(value)
element.type(text)
element.get_position()
element.get_html()
element.get_js_attributes()