diff --git a/docs/_data/sidebars/home_sidebar.yml b/docs/_data/sidebars/home_sidebar.yml index 7bf1735..fd97af4 100644 --- a/docs/_data/sidebars/home_sidebar.yml +++ b/docs/_data/sidebars/home_sidebar.yml @@ -30,6 +30,9 @@ entries: - output: web,pdf title: DuckDuckGo Search API url: tools_duckduckgo.html + - output: web,pdf + title: Annotating Kanji with Hiragana + url: tools_pyknp.html output: web title: ipymock output: web diff --git a/docs/automation.html b/docs/automation.html index 1809bca..a79a671 100644 --- a/docs/automation.html +++ b/docs/automation.html @@ -8,83 +8,103 @@ summary: "Using Selenium for Browser Automation" description: "Using Selenium for Browser Automation" -nb_path: "nbs\2_automation.ipynb" +nb_path: "nbs/2_automation.ipynb" --- +
{% raw %}
+
{% endraw %}
-
pip install undetected_chromedriver webdriver_manager
+
pip install undetected_chromedriver webdriver_manager
 
+
{% raw %}
+
{% endraw %} {% raw %}
+
+
-
-

init[source]

-
-

init(*arguments)

+ + + +
+

init[source]

init(*arguments)

+
+
+
+
{% endraw %} {% raw %}
+
{% endraw %} {% raw %}
+
+
-
-

quit[source]

-
-

quit()

+ + + +
+

quit[source]

quit()

+
+
+
+
{% endraw %} {% raw %}
+
{% endraw %} @@ -92,19 +112,30 @@

quit
+
-
-
# def init(*arguments):
+    
+
# from selenium.webdriver.chrome.service import Service
+# from webdriver_manager.chrome import ChromeDriverManager
+
+# def init(*arguments):
 #     chrome_options = webdriver.ChromeOptions()
 #     for argument in arguments:
 #         if isinstance(argument, str):
 #             chrome_options.add_argument(argument)
 #     global driver
-#     driver = webdriver.Chrome(options = chrome_options)
+#     driver = webdriver.Chrome(
+#         options = chrome_options,
+#         service = Service(ChromeDriverManager().install())
+#     )
+#     global device_pixel_ratio
+#     device_pixel_ratio = driver.execute_script('return window.devicePixelRatio;')
 
+ +
-
+
{% endraw %} @@ -112,656 +143,995 @@

quit
+
-
-
init('--user-data-dir=C:\\Users\\seii-saintway\\Downloads\\chrome-profile')
+    
+
init('--user-data-dir=C:\\Users\\seii-saintway\\Downloads\\chrome-profile')
 
+ +
-
+
{% endraw %}
-
+

Logging

+
-
+

Using Text-based Search for Browser Automation

+
{% raw %}
+
+
-
-

ok[source]

-
-

ok()

+ + + +
+

ok[source]

ok()

+
+
+
+
{% endraw %} {% raw %}
+
{% endraw %} {% raw %}
+
+
-
-

last[source]

-
-

last()

+ + + +
+

last[source]

last()

+
+
+
+
{% endraw %} {% raw %}
+
{% endraw %} {% raw %}
+
+
-
-

new[source]

-
-

new(url)

+ + + +
+

new[source]

new(url)

+
+
+
+
{% endraw %} {% raw %}
+
{% endraw %} {% raw %}
+
+
-
-

close[source]

-
-

close()

+ + + +
+

close[source]

close()

+
+
+
+
{% endraw %} {% raw %}
+
{% endraw %} {% raw %}
+
+
-
-

find_elements[source]

-
-

find_elements(prompt, exactly=True)

+ + + +
+

find_elements[source]

find_elements(prompt, exactly=True)

+
+
+
+
{% endraw %} {% raw %}
+
{% endraw %} {% raw %}
+
+
-
-

find_elements[source]

-
-

find_elements(prompt, exactly=True)

+ + + +
+

find_elements[source]

find_elements(prompt, exactly=True)

+
+
+
+
{% endraw %} {% raw %}
+
{% endraw %} {% raw %}
+
+
-
-

find_element[source]

-
-

find_element(prompt, closest_prompt=None)

+ + + +
+

find_element[source]

find_element(prompt, closest_prompt=None)

+
+
+
+
{% endraw %} {% raw %}
+
{% endraw %} {% raw %}
+
+
-
-

click[source]

-
-

click(prompt, closest_prompt=None)

+ + + +
+

click[source]

click(prompt=None, closest_prompt=None, xoffset:int=0, yoffset:int=0)

+
+
+
+
{% endraw %} {% raw %}
+
{% endraw %} {% raw %}
+
+
-
-

input[source]

-
-

input(prompt, text, closest_prompt=None)

+ + + +
+

input[source]

input(text, prompt=None, closest_prompt=None, xoffset:int=0, yoffset:int=0)

+
+
+
+
{% endraw %} {% raw %}
+
{% endraw %}
-
+

Using Selenium for Static Inspection of Page Appearance

+
{% raw %}
+
+
-
-

image_hash[source]

-
-

image_hash(image_path)

+ + + +
+

get_html_hash[source]

get_html_hash(xpath='//body')

-

Calculate the hash value of the image

+

Get the hash of the element's outerHTML.

+
+
+
+
{% endraw %} {% raw %}
+ +
+ {% endraw %} + + {% raw %} + +
+
+
-
-

screen_hash[source]

-
-

screen_hash()

+ + + +
+

wait[source]

wait(timeout=inf, stability_duration=1.0, check_interval=0.5, xpath='//body')

+

Wait until the HTML of the specified element does not change.

+

Args: + timeout: Maximum wait time for stabilization (seconds). + stability_duration: Duration for stabilization (seconds). + check_interval: Interval to check for changes (seconds). + xpath: XPATH of the element to monitor for HTML changes.

+
+
+
+
{% endraw %} {% raw %}
+
{% endraw %} {% raw %}
+
+
-
-

wait[source]

-
-

wait(timeout=1.7976931348623157e+308, stability_duration=1.0, check_interval=0.5)

+ + + +
+

screen_hash[source]

screen_hash()

+
+

Calculate the hash value of the screenshot.

+ +
+ +
+ +
+
+ +
+ {% endraw %} + + {% raw %} + +
+ +
+ {% endraw %} + + {% raw %} + +
+ +
+
+ +
+ + + +
+

watch[source]

watch(timeout=1.7976931348623157e+308, stability_duration=1.0, check_interval=0.5)

Wait until the screenshot does not change.

Args: -timeout: how long to wait for stabilization (seconds) -stability_duration: duration for stabilization (seconds) -check_interval: check interval (seconds)

+ timeout: how long to wait for stabilization (seconds) + stability_duration: duration for stabilization (seconds) + check_interval: check interval (seconds)

+
+
+
+
{% endraw %} {% raw %}
+
{% endraw %}
-
+

Using Airtest for Browser Automation

+
-
pip install airtest
+
pip install airtest
 
+
{% raw %}
+
+
-
-

find_position[source]

-
-

find_position(image, timeout=1.0, threshold=0.9, interval=0.5, intervalfunc=None)

+ + + +
+

try_log_screen[source]

try_log_screen(xpath=None)

+
+ +
+ +
+ +
+
+ +
+ {% endraw %} + + {% raw %} + +
+ +
+ {% endraw %} + + {% raw %} + +
+ +
+
+ +
+ + + +
+

find_position[source]

find_position(image, timeout=1.0, threshold=0.9, interval=0.5, intervalfunc=None)

Search for image template in the screen until timeout

Args: -path: image file path to be found in screenshot -timeout: time interval how long to look for the image template -threshold: default is None -interval: sleep interval before next attempt to find the image template -intervalfunc: function that is executed after unsuccessful attempt to find the image template

+ image: image file path to be found in screenshot + timeout: time interval how long to look for the image template + threshold: default is None + interval: sleep interval before next attempt to find the image template + intervalfunc: function that is executed after unsuccessful attempt to find the image template

Raises: -TargetNotFoundError: when image template is not found in screenshot

+ TargetNotFoundError: when image template is not found in screenshot

Returns: -TargetNotFoundError if image template not found, otherwise returns the position where the image template has -been found in screenshot

+ TargetNotFoundError if image template not found, otherwise returns the position where the image template has + been found in screenshot

+
+
+
+
{% endraw %} {% raw %}
+
+
-
-

inject[source]

-
-

inject()

+ + + +
+

inject[source]

inject()

+
+
+
+
{% endraw %} {% raw %}
+
+
-
-

get_mouse_position[source]

-
-

get_mouse_position()

+ + + +
+

get_mouse_position[source]

get_mouse_position()

+
+
+
+
{% endraw %} {% raw %}
+
+
-
-

move_to_center[source]

-
-

move_to_center()

+ + + +
+

move_to_center[source]

move_to_center()

+
+
+
+
{% endraw %} {% raw %}
+
+
-
-

move_and_click[source]

-
-

move_and_click(x, y)

+ + + +
+

move_and_click[source]

move_and_click(x, y, offset=True)

+
+
+
+
{% endraw %} {% raw %}
+
+
-
-

exists[source]

-
-

exists(image)

+ + + +
+

exists[source]

exists(image)

+
+
+
+
{% endraw %} {% raw %}
+
+
-
-

touch[source]

-
-

touch(image, text=None)

+ + + +
+

touch[source]

touch(image, text=None)

+
+
+
+
{% endraw %} {% raw %}
+
+
-
-

fill[source]

-
-

fill(text)

+ + + +
+

fill[source]

fill(text)

+
+
+
+
{% endraw %} {% raw %}
+
{% endraw %}
-
+

Using Selenium for Printing to PNG and PDF

+
-
pip install markdown
+
pip install markdown
 
+
{% raw %}
+
+
-
-

convert_md_with_ruby_to_html[source]

-
-

convert_md_with_ruby_to_html(md_file_path)

+ + + +
+

convert_md_with_ruby_to_html[source]

convert_md_with_ruby_to_html(md_file_path)

-

Convert Markdown files containing tags into HTML file

+

Convert Markdown file containing tags into HTML file

+
+
+
+
{% endraw %} {% raw %}
+ +
+
+ +
+ + + +
+

convert_md_content_to_html[source]

convert_md_content_to_html(md_content)

+
+

Convert Markdown content containing tags into HTML content

+ +
+ +
+ +
+
+ +
+ {% endraw %} + + {% raw %} + +
+ +
+
+ +
+ + + +
+

save_file[source]

save_file(file_path, content)

+
+

Write the content to a file

+ +
+ +
+ +
+
+ +
+ {% endraw %} + + {% raw %} + +
+
{% endraw %} {% raw %}
+
+
-
-

convert_html_with_ruby_to_png[source]

-
-

convert_html_with_ruby_to_png(html_file_path)

+ + + +
+

convert_html_with_ruby_to_png[source]

convert_html_with_ruby_to_png(html_file_path)

Use Selenium to print the HTML file as PNG

+
+
+
+
{% endraw %} {% raw %}
+
+
-
-

convert_md_with_ruby_to_png[source]

-
-

convert_md_with_ruby_to_png(md_file_path)

+ + + +
+

convert_md_with_ruby_to_png[source]

convert_md_with_ruby_to_png(md_file_path)

-

Convert the Markdown file containing tags into PNG

+

Convert the Markdown file containing tags into PNG

+
+
+
+
{% endraw %} {% raw %}
+
{% endraw %} {% raw %}
+
+
-
-

dialog_for_printing[source]

-
-

dialog_for_printing(timeout=inf)

+ + + +
+

dialog_for_printing[source]

dialog_for_printing(timeout=inf)

Dialog for printing to PDF

+
+
+
+
{% endraw %} {% raw %}
+
{% endraw %} {% raw %}
+
+
-
-

convert_html_with_ruby_to_pdf[source]

-
-

convert_html_with_ruby_to_pdf(html_file_path)

+ + + +
+

convert_html_with_ruby_to_pdf[source]

convert_html_with_ruby_to_pdf(html_file_path)

Use Selenium to print the HTML file as PDF

+
+
+
+
{% endraw %} {% raw %}
+
+
-
-

convert_md_with_ruby_to_pdf[source]

-
-

convert_md_with_ruby_to_pdf(md_file_path)

+ + + +
+

convert_md_with_ruby_to_pdf[source]

convert_md_with_ruby_to_pdf(md_file_path)

-

Convert the Markdown file containing tags into PDF

+

Convert the Markdown file containing tags into PDF

+
+
+
+
{% endraw %} {% raw %}
+
{% endraw %}
-
+
+
@@ -769,13 +1139,16 @@

convert_md_with_ru
+
-
-
init('--lang=en')
+    
+
init('--lang=en')
 
+ +
-
+
{% endraw %} @@ -783,13 +1156,16 @@

convert_md_with_ru
+
-
-
convert_md_with_ruby_to_png('2024-11-21.md')
+    
+
convert_md_with_ruby_to_png('2024-11-21.md')
 
+ +
-
+
{% endraw %} @@ -797,14 +1173,19 @@

convert_md_with_ru
+
-
-
convert_md_with_ruby_to_pdf('2024-11-21.md')
+    
+
convert_md_with_ruby_to_pdf('2024-11-21.md')
 
+ +
-
+
{% endraw %}
+ + diff --git a/docs/browser.html b/docs/browser.html index 9d309c6..906d8da 100644 --- a/docs/browser.html +++ b/docs/browser.html @@ -451,7 +451,7 @@

clear_conversations

-

selenium undetected chrome

+

Using Selenium Undetected Chrome for Operating ChatGPT

@@ -468,7 +468,7 @@

clear_conversations
-

init[source]

init(chrome_args=None)

+

init[source]

init(chrome_args=set())

@@ -485,6 +485,13 @@

init +

+ {% endraw %} + + {% raw %} + + @@ -600,7 +705,7 @@

request -

get_last_response[source]

get_last_response()

+

get_last_response[source]

get_last_response()

@@ -625,7 +730,7 @@

get_last_response -

get_response[source]

get_response()

+

get_response[source]

get_response()

@@ -650,7 +755,7 @@

get_response -

ask[source]

ask(prompt:str)

+

ask[source]

ask(prompt:str)

@@ -675,6 +780,36 @@

ask
+
+
+
import IPython
+
+for response in ask('''
+> Lord, keep us in You to be one.
+We are the ultimate risk takers.
+Our way is a way of risking life to **eternal life**.
+For the eternal life of the living, risk life and make war against perishing.
+For the eternal life of the dead, risk life and make war against death.
+>
+> ---
+Holy Father, help us to overcome!
+'''):
+    IPython.display.display(IPython.core.display.Markdown(response))
+    IPython.display.clear_output(wait=True)
+
+ +
+
+
+ +

+ {% endraw %} + + {% raw %} + +
+
+
@@ -786,7 +921,7 @@

get_screenshot -

class attrdict[source]

attrdict() :: dict

+

class attrdict[source]

attrdict() :: dict

dict() -> new empty dictionary dict(mapping) -> new dictionary initialized from a mapping object's @@ -820,7 +955,7 @@

class attrdict -

attributize[source]

attributize(obj)

+

attributize[source]

attributize(obj)

Add attributes to a dictionary and its sub-dictionaries.

@@ -846,7 +981,7 @@

attributize -

retry_on_status_code[source]

retry_on_status_code(func)

+

retry_on_status_code[source]

retry_on_status_code(func)

Retry decorator that retries a function on specific status codes.

@@ -872,7 +1007,7 @@

retry_on_status_code -

retry_on_status_code..wrapper[source]

retry_on_status_code..wrapper(*args, **kwargs)

+

retry_on_status_code..wrapper[source]

retry_on_status_code..wrapper(*args, **kwargs)

@@ -897,7 +1032,7 @@

retry_on
-

new_id[source]

new_id()

+

new_id[source]

new_id()

@@ -922,7 +1057,7 @@

new_id -

delta[source]

delta(prompt)

+

delta[source]

delta(prompt)

@@ -947,7 +1082,7 @@

delta -

chat_delta[source]

chat_delta(prompt)

+

chat_delta[source]

chat_delta(prompt)

@@ -972,7 +1107,7 @@

chat_delta -

mock_create[source]

mock_create(*args, **kwargs)

+

mock_create[source]

mock_create(*args, **kwargs)

@@ -997,7 +1132,7 @@

mock_create -

mock_chat_create[source]

mock_chat_create(*args, **kwargs)

+

mock_chat_create[source]

mock_chat_create(*args, **kwargs)

@@ -1152,7 +1287,7 @@

mock_chat_create -

mock_openai[source]

mock_openai(monkeypatch)

+

mock_openai[source]

mock_openai(monkeypatch)

diff --git a/docs/core.html b/docs/core.html index 1c25d3d..4c011ab 100644 --- a/docs/core.html +++ b/docs/core.html @@ -48,7 +48,7 @@
-

get_test_funcs[source]

get_test_funcs(**test_entries)

+

get_test_funcs[source]

get_test_funcs(**test_entries)

@@ -80,7 +80,7 @@

get_test_funcs -

print_result(idx, test_func, method_type)

+

print_result(idx, test_func, method_type)

@@ -112,7 +112,7 @@
diff --git a/docs/sidebar.json b/docs/sidebar.json index fb1af6d..05dd5ed 100644 --- a/docs/sidebar.json +++ b/docs/sidebar.json @@ -2,10 +2,12 @@ "ipymock": { "Overview": "/", "iPyTest": "core.html", + "Browser Automation": "automation.html", "Mock OpenAI API": "browser.html", "LLM": "llm.html", "Chinese BabyAGI": "agi.html", "Document Readers": "tools_document.html", - "DuckDuckGo Search API": "tools_duckduckgo.html" + "DuckDuckGo Search API": "tools_duckduckgo.html", + "Annotating Kanji with Hiragana": "tools_pyknp.html" } } \ No newline at end of file diff --git a/docs/tools_pyknp.html b/docs/tools_pyknp.html new file mode 100644 index 0000000..79d3ca5 --- /dev/null +++ b/docs/tools_pyknp.html @@ -0,0 +1,128 @@ +--- + +title: Annotating Kanji with Hiragana + + +keywords: fastai +sidebar: home_sidebar + +summary: "Using Morphological Analyzer JUMAN++ and Syntactic Analyzer KNP" +description: "Using Morphological Analyzer JUMAN++ and Syntactic Analyzer KNP" +nb_path: "nbs/4_tools_pyknp.ipynb" +--- + + +
+ + {% raw %} + +
+ +
+ {% endraw %} + +
+
+

Install JUMAN++

+
pip install pyknp
+
+ +
+
+
+ {% raw %} + +
+ +
+
+ +
+ + + +
+

is_halfwidth[source]

is_halfwidth(text)

+
+

Determine whether the text consists entirely of halfwidth characters. +:param text: Input text string +:return: True if all characters are halfwidth, otherwise False

+ +
+ +
+ +
+
+ +
+ {% endraw %} + + {% raw %} + +
+ +
+
+ +
+ + + +
+

halfwidth_to_fullwidth[source]

halfwidth_to_fullwidth(text)

+
+ +
+ +
+ +
+
+ +
+ {% endraw %} + + {% raw %} + +
+ +
+
+ +
+ + + +
+

annotate[source]

annotate(text)

+
+ +
+ +
+ +
+
+ +
+ {% endraw %} + + {% raw %} + +
+ +
+ {% endraw %} + +
+ + diff --git a/ipymock/_nbdev.py b/ipymock/_nbdev.py index 3818c48..8a339f7 100644 --- a/ipymock/_nbdev.py +++ b/ipymock/_nbdev.py @@ -58,7 +58,7 @@ "mouseMarkLD.style.zIndex": "2_automation.ipynb", "convert_md_with_ruby_to_html": "2_automation.ipynb", "convert_md_content_to_html": "2_automation.ipynb", - "write_html_file": "2_automation.ipynb", + "save_file": "2_automation.ipynb", "convert_html_with_ruby_to_png": "2_automation.ipynb", "convert_md_with_ruby_to_png": "2_automation.ipynb", "dialog_for_printing": "2_automation.ipynb", @@ -129,14 +129,18 @@ "final_objective": "4_agi.ipynb", "test_baby_agi": "4_agi.ipynb", "mock_baby_agi": "4_agi.ipynb", - "DuckDuckGoSearchAPIWrapper": "4_tools_duckduckgo.ipynb"} + "DuckDuckGoSearchAPIWrapper": "4_tools_duckduckgo.ipynb", + "is_halfwidth": "4_tools_pyknp.ipynb", + "halfwidth_to_fullwidth": "4_tools_pyknp.ipynb", + "annotate": "4_tools_pyknp.ipynb"} modules = ["__init__.py", "automation.py", "browser.py", "llm.py", "agi.py", - "reader.py"] + "reader.py", + "nlp.py"] doc_url = "https://seii-saintway.github.io/ipymock/" diff --git a/ipymock/automation.py b/ipymock/automation.py index 868028e..0cfa36c 100644 --- a/ipymock/automation.py +++ b/ipymock/automation.py @@ -3,17 +3,20 @@ __all__ = ['driver', 'device_pixel_ratio', 'init', 'quit', 'ok', 'last', 'new', 'close', 'find_elements', 'find_elements', 'find_element', 'click', 'input', 'get_html_hash', 'wait', 'screen_hash', 'watch', 'try_log_screen', 'find_position', 'inject', 'get_mouse_position', 'move_to_center', 'move_and_click', - 'exists', 'touch', 'fill', 'convert_md_with_ruby_to_html', 'convert_md_content_to_html', 'write_html_file', + 'exists', 'touch', 'fill', 'convert_md_with_ruby_to_html', 'convert_md_content_to_html', 'save_file', 'convert_html_with_ruby_to_png', 'convert_md_with_ruby_to_png', 'dialog_for_printing', 'convert_html_with_ruby_to_pdf', 'convert_md_with_ruby_to_pdf'] # Internal Cell -import os +import os, typing +from selenium.webdriver.remote.webdriver import WebDriver + +# Internal Cell import undetected_chromedriver from webdriver_manager.chrome import ChromeDriverManager # Cell -driver = None +driver: typing.Optional[WebDriver] = None device_pixel_ratio = 1 # Cell @@ -112,7 +115,15 @@ def find_elements(prompt, exactly = True): for scope in range(1, 5): elements = driver.find_elements(By.XPATH, f'//*[.{"/*" * (scope - 1)} and not(.{"/*" * scope}) and contains(., "{prompt}")]') elements = [elem for elem in elements if elem.is_displayed() and (elem.text == prompt or not exactly)] - logger.info(f'Search for {prompt} in scope {scope}: found {len(elements)} element(s)') + logger.info(f'Search for text "{prompt}" in scope {scope}: found {len(elements)} element(s)') + if elements: + return elements + if exactly: + elements = driver.find_elements(By.XPATH, f'//*[.{"/*" * (scope - 1)} and not(.{"/*" * scope}) and @*[.="{prompt}"]]') + else: + elements = driver.find_elements(By.XPATH, f'//*[.{"/*" * (scope - 1)} and not(.{"/*" * scope}) and @*[contains(., "{prompt}")]]') + elements = [elem for elem in elements if elem.is_displayed()] + logger.info(f'Search for attr "{prompt}" in scope {scope}: found {len(elements)} element(s)') if elements: return elements raise NoSuchElementException @@ -133,27 +144,33 @@ def find_element(prompt, closest_prompt = None): from selenium.webdriver.common.action_chains import ActionChains # Cell -def click(prompt, closest_prompt = None): +def click(prompt = None, closest_prompt = None, xoffset: int = 0, yoffset: int = 0): + if prompt is None: + return move_and_click(xoffset, yoffset, True) if isinstance(prompt, str): prompt = find_element(prompt, closest_prompt) - ActionChains(driver).move_to_element(prompt).click().perform() + ActionChains(driver).move_to_element_with_offset(prompt, xoffset, yoffset).click().perform() return prompt # Cell -def input(prompt, text, closest_prompt = None): - prompt = click(prompt, closest_prompt) +def input(text, prompt = None, closest_prompt = None, xoffset: int = 0, yoffset: int = 0): + prompt = click(prompt, closest_prompt, xoffset, yoffset) ActionChains(driver).send_keys(text).perform() return prompt # Internal Cell import hashlib, time +from selenium.common.exceptions import StaleElementReferenceException # Cell def get_html_hash(xpath = '//body'): """Get the hash of the element's outerHTML.""" # driver is the Selenium WebDriver global instance. elements = driver.find_elements(By.XPATH, xpath) - html = elements[-1].get_attribute('outerHTML') if elements else '' + try: + html = elements[-1].get_attribute('outerHTML') if elements else '' + except StaleElementReferenceException: + html = '' return hashlib.md5(html.encode('utf-8')).hexdigest(), time.time() # Cell @@ -365,12 +382,17 @@ def move_to_center(): ActionChains(driver).move_to_element(body).perform() return body.rect['x'] + body.rect['width'] >> 1, body.rect['y'] + body.rect['height'] >> 1 -def move_and_click(x, y): - # Retrieve the mouse coordinates. - center_x, center_y = move_to_center() - logger.debug(f'Get center position: {center_x}, {center_y}') - # Move to the specified coordinates (x, y) and click. - ActionChains(driver).move_by_offset(x - center_x, y - center_y).click().perform() +def move_and_click(x, y, offset = True): + if offset: + xoffset, yoffset = x, y + else: + # Retrieve the mouse coordinates. + center_x, center_y = move_to_center() + logger.debug(f'Get center position: {center_x}, {center_y}') + xoffset, yoffset = x - center_x, y - center_y + # Move to the specified coordinates and click. + ActionChains(driver).move_by_offset(xoffset, yoffset).click().perform() + return xoffset, yoffset def exists(image): try: @@ -382,7 +404,7 @@ def exists(image): def touch(image, text = None): try: x, y = find_position(image) - move_and_click(x, y) + move_and_click(x, y, False) if isinstance(text, str): fill(text) except: @@ -408,7 +430,7 @@ def convert_md_with_ruby_to_html(md_file_path): # Create an HTML document while preserving the tags html_document = convert_md_content_to_html(md_content) - write_html_file(html_file_path, html_document) + save_file(html_file_path, html_document) return html_file_path, html_document def convert_md_content_to_html(md_content): @@ -434,10 +456,10 @@ def convert_md_content_to_html(md_content): """ -def write_html_file(html_file_path, html_content): - ''' Write the HTML content to a file ''' - with open(html_file_path, 'w', encoding='utf-8') as html_file: - html_file.write(html_content) +def save_file(file_path, content): + ''' Write the content to a file ''' + with open(file_path, 'w', encoding='utf-8') as file: + file.write(content) # Cell def convert_html_with_ruby_to_png(html_file_path): @@ -459,7 +481,8 @@ def convert_html_with_ruby_to_png(html_file_path): # Get the current window size current_window_size = driver.get_window_size() # Set the window size to the resolution of iPhone 16 Pro Max - driver.set_window_size(642, 1389) + # driver.set_window_size(642, 1389) + driver.set_window_size(642/3*2, 1389/3*2) # Capture the page and save as PNG driver.save_screenshot(png_file_path) # Reset the window size diff --git a/ipymock/browser.py b/ipymock/browser.py index 6962359..e4c9db7 100644 --- a/ipymock/browser.py +++ b/ipymock/browser.py @@ -285,6 +285,7 @@ def init(chrome_args = set()): # Internal Cell from .automation import new, wait, click, input, fill +from selenium.common.exceptions import NoSuchElementException # Cell def login(): @@ -312,7 +313,7 @@ def login(): # WebDriverWait(common.driver, 5).until( # expected_conditions.presence_of_element_located((By.XPATH, '//button[@data-provider="google"]')) # ) - wait(5.0) + wait(5.0, stability_duration = 3.0) # common.driver.execute_script(''' # document.evaluate( @@ -344,7 +345,7 @@ def login(): # }}) # ); # ''') - input('Email or phone', common.config['email']) + input(common.config['email'], 'Email or phone') # WebDriverWait(common.driver, 5).until( # expected_conditions.presence_of_element_located((By.XPATH, '//*[@id="identifierNext"]')) @@ -368,11 +369,15 @@ def login(): # WebDriverWait(common.driver, 10).until( # expected_conditions.element_to_be_clickable((By.XPATH, '//input[@type="password"]')) # ).click() - wait(10.0) + wait(10.0, stability_duration = 5.0) # ActionChains(common.driver).send_keys(common.config['password']).send_keys(Keys.ENTER).perform() fill(common.config['password']) - fill(Keys.ENTER) + # fill(Keys.ENTER) + try: + click('Next') + except NoSuchElementException: + pass wait(stability_duration = 5.0) common.driver.maximize_window() @@ -380,9 +385,6 @@ def login(): remove_portal() -# Internal Cell -from selenium.common.exceptions import NoSuchElementException - # Cell def open_chat(conversation_id = ''): from .automation import driver @@ -435,13 +437,13 @@ def remove_portal(): chatgpt_response = (By.XPATH, '//div[starts-with(@class, "markdown prose w-full break-words")]') chatgpt_red_500 = (By.XPATH, '//div[contains(@class, "border-red-500 bg-red-500/10")]') chatgpt_big_response = (By.XPATH, '//div[@class="flex-1 overflow-hidden"]//div[p or pre]') -chatgpt_small_response = (By.XPATH, '//div[@class="flex-1 overflow-hidden"]//code') +chatgpt_small_response = (By.XPATH, './/code[span]') # Internal Cell from typing import Generator # Internal Cell -from .automation import exists, touch +# from ipymock.automation import exists, touch # Cell def request(prompt: str) -> None: @@ -455,13 +457,21 @@ def request(prompt: str) -> None: # expected_conditions.element_to_be_clickable(chatgpt_textbox) # ) # textbox.click() - click('ChatGPT can make mistakes. Check important info.') - textbox = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../assets/message-chatgpt.png')) - textbox = 'assets/message-chatgpt.png' - if not exists(textbox): + + # click('ChatGPT can make mistakes. Check important info.') + # textbox = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../assets/message-chatgpt.png')) + # textbox = 'assets/message-chatgpt.png' + # from ipymock.automation import exists + # if not exists(textbox): + # open_chat(common.conversation_id) + try: + click('Message ChatGPT') + except NoSuchElementException: open_chat(common.conversation_id) - click('ChatGPT can make mistakes. Check important info.') - touch(textbox) + # click('ChatGPT can make mistakes. Check important info.') + # touch(textbox) + click('Message ChatGPT') + # textbox.send_keys(prompt.strip()) # common.driver.execute_script(''' # var element = arguments[0], txt = arguments[1]; @@ -474,18 +484,23 @@ def request(prompt: str) -> None: for line in prompt.strip().split('\n'): fill(line) ActionChains(common.driver).key_down(Keys.SHIFT).send_keys(Keys.ENTER).key_up(Keys.SHIFT).perform() + # WebDriverWait(common.driver, 3).until_not( # expected_conditions.presence_of_element_located(chatgpt_disabled_button) # ) - click('ChatGPT can make mistakes. Check important info.') - wait(1.0) + wait(stability_duration = 3.0) + # textbox.send_keys('\n') # textbox.send_keys(Keys.ENTER) - send_button = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../assets/send-button.png')) - send_button = 'assets/send-button.png' - touch(send_button) + + # click('ChatGPT can make mistakes. Check important info.') + # send_button = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../assets/send-button.png')) + # send_button = 'assets/send-button.png' + # touch(send_button) + fill(Keys.ENTER) click('ChatGPT can make mistakes. Check important info.') wait(1.0) + # try: # WebDriverWait(common.driver, 5).until( # expected_conditions.element_to_be_clickable(chatgpt_enabled_button) @@ -494,26 +509,24 @@ def request(prompt: str) -> None: # pass def get_last_response(): - responses = common.driver.find_elements(*chatgpt_small_response) - if responses != []: - return responses[-1] - responses = common.driver.find_elements(*chatgpt_big_response) - if responses != []: - return responses[-1] - responses = common.driver.find_elements(*chatgpt_response) - if responses != []: - return responses[-1] + for xpath in chatgpt_response, chatgpt_big_response: + responses = common.driver.find_elements(*xpath) + if responses != []: + elements = responses[-1].find_elements(*chatgpt_small_response) + if len(elements) == 1: + return elements[0] + return responses[-1] def get_response() -> Generator[str, None, None]: try: - result_streaming = WebDriverWait(common.driver, 30).until( + result_streaming = WebDriverWait(common.driver, 5).until( expected_conditions.presence_of_element_located(chatgpt_streaming) ) except TimeoutException: response = get_last_response() error = common.driver.find_elements(*chatgpt_red_500) != [] sys.stderr.write( - 'TimeoutException: having waited 30 seconds for result-streaming\n' + 'TimeoutException: having waited 5 seconds for result-streaming\n' f'response.text = {response.text}\n' f'error = {error}\n' ) diff --git a/ipymock/nlp.py b/ipymock/nlp.py new file mode 100644 index 0000000..ac9aa19 --- /dev/null +++ b/ipymock/nlp.py @@ -0,0 +1,48 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: nbs/4_tools_pyknp.ipynb (unless otherwise specified). + +__all__ = ['is_halfwidth', 'halfwidth_to_fullwidth', 'annotate'] + +# Internal Cell +from pyknp import Juman + +# Cell +def is_halfwidth(text): + """ + Determine whether the text consists entirely of halfwidth characters. + :param text: Input text string + :return: True if all characters are halfwidth, otherwise False + """ + return all('\u0020' <= char <= '\u007E' or '\uFF61' <= char <= '\uFF9F' for char in text) + +def halfwidth_to_fullwidth(text): + result = '' + for char in text: + code = ord(char) + if code == ord(' '): + result += ' ' + elif ord('!') <= code <= ord('~'): + # Convert ASCII characters in the range 0x0021 to 0x007E + result += chr(code + 0xFEE0) + else: + result += char + return result + +def annotate(text): + juman = Juman() + for line in text.split('\n'): + if line == '': + yield '\n' + continue + mrphs = juman.analysis(line).mrph_list() + for mrph in mrphs: + if mrph.midasi == '\\␣': + yield ' ' + continue + if is_halfwidth(mrph.midasi): + yield mrph.midasi + continue + if mrph.midasi == mrph.yomi: + yield mrph.midasi + continue + yield f'{mrph.midasi}{mrph.yomi}' + yield '\n' \ No newline at end of file diff --git a/nbs/2_automation.ipynb b/nbs/2_automation.ipynb index 12e00dc..4a138aa 100644 --- a/nbs/2_automation.ipynb +++ b/nbs/2_automation.ipynb @@ -34,7 +34,17 @@ "outputs": [], "source": [ "# exporti\n", - "import os\n", + "import os, typing\n", + "from selenium.webdriver.remote.webdriver import WebDriver" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# exporti\n", "import undetected_chromedriver\n", "from webdriver_manager.chrome import ChromeDriverManager" ] @@ -46,7 +56,7 @@ "outputs": [], "source": [ "# export\n", - "driver = None\n", + "driver: typing.Optional[WebDriver] = None\n", "device_pixel_ratio = 1" ] }, @@ -95,6 +105,8 @@ "outputs": [], "source": [ "# from selenium import webdriver\n", + "# from selenium.webdriver.chrome.service import Service\n", + "# from webdriver_manager.chrome import ChromeDriverManager\n", "\n", "# def init(*arguments):\n", "# chrome_options = webdriver.ChromeOptions()\n", @@ -102,7 +114,12 @@ "# if isinstance(argument, str):\n", "# chrome_options.add_argument(argument)\n", "# global driver\n", - "# driver = webdriver.Chrome(options = chrome_options)" + "# driver = webdriver.Chrome(\n", + "# options = chrome_options,\n", + "# service = Service(ChromeDriverManager().install())\n", + "# )\n", + "# global device_pixel_ratio\n", + "# device_pixel_ratio = driver.execute_script('return window.devicePixelRatio;')" ] }, { @@ -279,7 +296,15 @@ " for scope in range(1, 5):\n", " elements = driver.find_elements(By.XPATH, f'//*[.{\"/*\" * (scope - 1)} and not(.{\"/*\" * scope}) and contains(., \"{prompt}\")]')\n", " elements = [elem for elem in elements if elem.is_displayed() and (elem.text == prompt or not exactly)]\n", - " logger.info(f'Search for {prompt} in scope {scope}: found {len(elements)} element(s)')\n", + " logger.info(f'Search for text \"{prompt}\" in scope {scope}: found {len(elements)} element(s)')\n", + " if elements:\n", + " return elements\n", + " if exactly:\n", + " elements = driver.find_elements(By.XPATH, f'//*[.{\"/*\" * (scope - 1)} and not(.{\"/*\" * scope}) and @*[.=\"{prompt}\"]]')\n", + " else:\n", + " elements = driver.find_elements(By.XPATH, f'//*[.{\"/*\" * (scope - 1)} and not(.{\"/*\" * scope}) and @*[contains(., \"{prompt}\")]]')\n", + " elements = [elem for elem in elements if elem.is_displayed()]\n", + " logger.info(f'Search for attr \"{prompt}\" in scope {scope}: found {len(elements)} element(s)')\n", " if elements:\n", " return elements\n", " raise NoSuchElementException" @@ -321,10 +346,12 @@ "outputs": [], "source": [ "# export\n", - "def click(prompt, closest_prompt = None):\n", + "def click(prompt = None, closest_prompt = None, xoffset: int = 0, yoffset: int = 0):\n", + " if prompt is None:\n", + " return move_and_click(xoffset, yoffset, True)\n", " if isinstance(prompt, str):\n", " prompt = find_element(prompt, closest_prompt)\n", - " ActionChains(driver).move_to_element(prompt).click().perform()\n", + " ActionChains(driver).move_to_element_with_offset(prompt, xoffset, yoffset).click().perform()\n", " return prompt" ] }, @@ -335,8 +362,8 @@ "outputs": [], "source": [ "# export\n", - "def input(prompt, text, closest_prompt = None):\n", - " prompt = click(prompt, closest_prompt)\n", + "def input(text, prompt = None, closest_prompt = None, xoffset: int = 0, yoffset: int = 0):\n", + " prompt = click(prompt, closest_prompt, xoffset, yoffset)\n", " ActionChains(driver).send_keys(text).perform()\n", " return prompt" ] @@ -356,7 +383,8 @@ "outputs": [], "source": [ "# exporti\n", - "import hashlib, time" + "import hashlib, time\n", + "from selenium.common.exceptions import StaleElementReferenceException" ] }, { @@ -370,7 +398,10 @@ " \"\"\"Get the hash of the element's outerHTML.\"\"\"\n", " # driver is the Selenium WebDriver global instance.\n", " elements = driver.find_elements(By.XPATH, xpath)\n", - " html = elements[-1].get_attribute('outerHTML') if elements else ''\n", + " try:\n", + " html = elements[-1].get_attribute('outerHTML') if elements else ''\n", + " except StaleElementReferenceException:\n", + " html = ''\n", " return hashlib.md5(html.encode('utf-8')).hexdigest(), time.time()" ] }, @@ -648,12 +679,17 @@ " ActionChains(driver).move_to_element(body).perform()\n", " return body.rect['x'] + body.rect['width'] >> 1, body.rect['y'] + body.rect['height'] >> 1\n", "\n", - "def move_and_click(x, y):\n", - " # Retrieve the mouse coordinates.\n", - " center_x, center_y = move_to_center()\n", - " logger.debug(f'Get center position: {center_x}, {center_y}')\n", - " # Move to the specified coordinates (x, y) and click.\n", - " ActionChains(driver).move_by_offset(x - center_x, y - center_y).click().perform()\n", + "def move_and_click(x, y, offset = True):\n", + " if offset:\n", + " xoffset, yoffset = x, y\n", + " else:\n", + " # Retrieve the mouse coordinates.\n", + " center_x, center_y = move_to_center()\n", + " logger.debug(f'Get center position: {center_x}, {center_y}')\n", + " xoffset, yoffset = x - center_x, y - center_y\n", + " # Move to the specified coordinates and click.\n", + " ActionChains(driver).move_by_offset(xoffset, yoffset).click().perform()\n", + " return xoffset, yoffset\n", "\n", "def exists(image):\n", " try:\n", @@ -665,7 +701,7 @@ "def touch(image, text = None):\n", " try:\n", " x, y = find_position(image)\n", - " move_and_click(x, y)\n", + " move_and_click(x, y, False)\n", " if isinstance(text, str):\n", " fill(text)\n", " except:\n", @@ -722,7 +758,7 @@ "\n", " # Create an HTML document while preserving the tags\n", " html_document = convert_md_content_to_html(md_content)\n", - " write_html_file(html_file_path, html_document)\n", + " save_file(html_file_path, html_document)\n", " return html_file_path, html_document\n", "\n", "def convert_md_content_to_html(md_content):\n", @@ -748,10 +784,10 @@ "\n", "\"\"\"\n", "\n", - "def write_html_file(html_file_path, html_content):\n", - " ''' Write the HTML content to a file '''\n", - " with open(html_file_path, 'w', encoding='utf-8') as html_file:\n", - " html_file.write(html_content)" + "def save_file(file_path, content):\n", + " ''' Write the content to a file '''\n", + " with open(file_path, 'w', encoding='utf-8') as file:\n", + " file.write(content)" ] }, { @@ -780,7 +816,8 @@ " # Get the current window size\n", " current_window_size = driver.get_window_size()\n", " # Set the window size to the resolution of iPhone 16 Pro Max\n", - " driver.set_window_size(642, 1389)\n", + " # driver.set_window_size(642, 1389)\n", + " driver.set_window_size(642/3*2, 1389/3*2)\n", " # Capture the page and save as PNG\n", " driver.save_screenshot(png_file_path)\n", " # Reset the window size\n", @@ -902,7 +939,7 @@ ], "metadata": { "kernelspec": { - "display_name": "ipymock", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" } diff --git a/nbs/2_browser.ipynb b/nbs/2_browser.ipynb index 485bf03..6f14504 100644 --- a/nbs/2_browser.ipynb +++ b/nbs/2_browser.ipynb @@ -520,7 +520,8 @@ "outputs": [], "source": [ "# exporti\n", - "from ipymock.automation import new, wait, click, input, fill" + "from ipymock.automation import new, wait, click, input, fill\n", + "from selenium.common.exceptions import NoSuchElementException" ] }, { @@ -556,7 +557,7 @@ " # WebDriverWait(common.driver, 5).until(\n", " # expected_conditions.presence_of_element_located((By.XPATH, '//button[@data-provider=\"google\"]'))\n", " # )\n", - " wait(5.0)\n", + " wait(5.0, stability_duration = 3.0)\n", "\n", " # common.driver.execute_script('''\n", " # document.evaluate(\n", @@ -588,7 +589,7 @@ " # }})\n", " # );\n", " # ''')\n", - " input('Email or phone', common.config['email'])\n", + " input(common.config['email'], 'Email or phone')\n", "\n", " # WebDriverWait(common.driver, 5).until(\n", " # expected_conditions.presence_of_element_located((By.XPATH, '//*[@id=\"identifierNext\"]'))\n", @@ -612,11 +613,15 @@ " # WebDriverWait(common.driver, 10).until(\n", " # expected_conditions.element_to_be_clickable((By.XPATH, '//input[@type=\"password\"]'))\n", " # ).click()\n", - " wait(10.0)\n", + " wait(10.0, stability_duration = 5.0)\n", "\n", " # ActionChains(common.driver).send_keys(common.config['password']).send_keys(Keys.ENTER).perform()\n", " fill(common.config['password'])\n", - " fill(Keys.ENTER)\n", + " # fill(Keys.ENTER)\n", + " try:\n", + " click('Next')\n", + " except NoSuchElementException:\n", + " pass\n", " wait(stability_duration = 5.0)\n", "\n", " common.driver.maximize_window()\n", @@ -634,20 +639,11 @@ "source": [ "# notest\n", "init_browser('--lang=en', '--force-dark-mode')\n", + "from ipymock.automation import driver\n", + "common.driver = driver\n", "login()" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "b35337e8", - "metadata": {}, - "outputs": [], - "source": [ - "# exporti\n", - "from selenium.common.exceptions import NoSuchElementException" - ] - }, { "cell_type": "code", "execution_count": null, @@ -726,7 +722,7 @@ "chatgpt_response = (By.XPATH, '//div[starts-with(@class, \"markdown prose w-full break-words\")]')\n", "chatgpt_red_500 = (By.XPATH, '//div[contains(@class, \"border-red-500 bg-red-500/10\")]')\n", "chatgpt_big_response = (By.XPATH, '//div[@class=\"flex-1 overflow-hidden\"]//div[p or pre]')\n", - "chatgpt_small_response = (By.XPATH, '//div[@class=\"flex-1 overflow-hidden\"]//code')" + "chatgpt_small_response = (By.XPATH, './/code[span]')" ] }, { @@ -748,7 +744,7 @@ "outputs": [], "source": [ "# exporti\n", - "from ipymock.automation import exists, touch" + "# from ipymock.automation import exists, touch" ] }, { @@ -783,6 +779,7 @@ "# notest\n", "__file__ = '2_browser.ipynb'\n", "send_button = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../assets/send-button.png'))\n", + "from ipymock.automation import touch\n", "touch(send_button)" ] }, @@ -805,13 +802,21 @@ " # expected_conditions.element_to_be_clickable(chatgpt_textbox)\n", " # )\n", " # textbox.click()\n", - " click('ChatGPT can make mistakes. Check important info.')\n", - " textbox = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../assets/message-chatgpt.png'))\n", - " textbox = 'assets/message-chatgpt.png'\n", - " if not exists(textbox):\n", + "\n", + " # click('ChatGPT can make mistakes. Check important info.')\n", + " # textbox = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../assets/message-chatgpt.png'))\n", + " # textbox = 'assets/message-chatgpt.png'\n", + " # from ipymock.automation import exists\n", + " # if not exists(textbox):\n", + " # open_chat(common.conversation_id)\n", + " try:\n", + " click('Message ChatGPT')\n", + " except NoSuchElementException:\n", " open_chat(common.conversation_id)\n", - " click('ChatGPT can make mistakes. Check important info.')\n", - " touch(textbox)\n", + " # click('ChatGPT can make mistakes. Check important info.')\n", + " # touch(textbox)\n", + " click('Message ChatGPT')\n", + "\n", " # textbox.send_keys(prompt.strip())\n", " # common.driver.execute_script('''\n", " # var element = arguments[0], txt = arguments[1];\n", @@ -824,18 +829,23 @@ " for line in prompt.strip().split('\\n'):\n", " fill(line)\n", " ActionChains(common.driver).key_down(Keys.SHIFT).send_keys(Keys.ENTER).key_up(Keys.SHIFT).perform()\n", + "\n", " # WebDriverWait(common.driver, 3).until_not(\n", " # expected_conditions.presence_of_element_located(chatgpt_disabled_button)\n", " # )\n", - " click('ChatGPT can make mistakes. Check important info.')\n", - " wait(1.0)\n", + " wait(stability_duration = 3.0)\n", + "\n", " # textbox.send_keys('\\n')\n", " # textbox.send_keys(Keys.ENTER)\n", - " send_button = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../assets/send-button.png'))\n", - " send_button = 'assets/send-button.png'\n", - " touch(send_button)\n", + "\n", + " # click('ChatGPT can make mistakes. Check important info.')\n", + " # send_button = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../assets/send-button.png'))\n", + " # send_button = 'assets/send-button.png'\n", + " # touch(send_button)\n", + " fill(Keys.ENTER)\n", " click('ChatGPT can make mistakes. Check important info.')\n", " wait(1.0)\n", + "\n", " # try:\n", " # WebDriverWait(common.driver, 5).until(\n", " # expected_conditions.element_to_be_clickable(chatgpt_enabled_button)\n", @@ -844,26 +854,24 @@ " # pass\n", "\n", "def get_last_response():\n", - " responses = common.driver.find_elements(*chatgpt_small_response)\n", - " if responses != []:\n", - " return responses[-1]\n", - " responses = common.driver.find_elements(*chatgpt_big_response)\n", - " if responses != []:\n", - " return responses[-1]\n", - " responses = common.driver.find_elements(*chatgpt_response)\n", - " if responses != []:\n", - " return responses[-1]\n", + " for xpath in chatgpt_response, chatgpt_big_response:\n", + " responses = common.driver.find_elements(*xpath)\n", + " if responses != []:\n", + " elements = responses[-1].find_elements(*chatgpt_small_response)\n", + " if len(elements) == 1:\n", + " return elements[0]\n", + " return responses[-1]\n", "\n", "def get_response() -> Generator[str, None, None]:\n", " try:\n", - " result_streaming = WebDriverWait(common.driver, 30).until(\n", + " result_streaming = WebDriverWait(common.driver, 5).until(\n", " expected_conditions.presence_of_element_located(chatgpt_streaming)\n", " )\n", " except TimeoutException:\n", " response = get_last_response()\n", " error = common.driver.find_elements(*chatgpt_red_500) != []\n", " sys.stderr.write(\n", - " 'TimeoutException: having waited 30 seconds for result-streaming\\n'\n", + " 'TimeoutException: having waited 5 seconds for result-streaming\\n'\n", " f'response.text = {response.text}\\n'\n", " f'error = {error}\\n'\n", " )\n", @@ -1270,7 +1278,7 @@ ], "metadata": { "kernelspec": { - "display_name": "ipymock", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" } diff --git a/nbs/4_tools_pyknp.ipynb b/nbs/4_tools_pyknp.ipynb new file mode 100644 index 0000000..fe9e7f8 --- /dev/null +++ b/nbs/4_tools_pyknp.ipynb @@ -0,0 +1,101 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# default_exp nlp" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Annotating Kanji with Hiragana\n", + "\n", + "> Using [Morphological Analyzer JUMAN++ and Syntactic Analyzer KNP](https://github.com/ku-nlp/pyknp)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Install [JUMAN++](https://github.com/ku-nlp/jumanpp)\n", + "\n", + "```bash\n", + "pip install pyknp\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# exporti\n", + "from pyknp import Juman" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# export\n", + "def is_halfwidth(text):\n", + " \"\"\"\n", + " Determine whether the text consists entirely of halfwidth characters.\n", + " :param text: Input text string\n", + " :return: True if all characters are halfwidth, otherwise False\n", + " \"\"\"\n", + " return all('\\u0020' <= char <= '\\u007E' or '\\uFF61' <= char <= '\\uFF9F' for char in text)\n", + "\n", + "def halfwidth_to_fullwidth(text):\n", + " result = ''\n", + " for char in text:\n", + " code = ord(char)\n", + " if code == ord(' '):\n", + " result += ' '\n", + " elif ord('!') <= code <= ord('~'):\n", + " # Convert ASCII characters in the range 0x0021 to 0x007E\n", + " result += chr(code + 0xFEE0)\n", + " else:\n", + " result += char\n", + " return result\n", + "\n", + "def annotate(text):\n", + " juman = Juman()\n", + " for line in text.split('\\n'):\n", + " if line == '':\n", + " yield '\\n'\n", + " continue\n", + " mrphs = juman.analysis(line).mrph_list()\n", + " for mrph in mrphs:\n", + " if mrph.midasi == '\\\\␣':\n", + " yield ' '\n", + " continue\n", + " if is_halfwidth(mrph.midasi):\n", + " yield mrph.midasi\n", + " continue\n", + " if mrph.midasi == mrph.yomi:\n", + " yield mrph.midasi\n", + " continue\n", + " yield f'{mrph.midasi}{mrph.yomi}'\n", + " yield '\\n'" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/settings.ini b/settings.ini index 902005d..8b58a24 100644 --- a/settings.ini +++ b/settings.ini @@ -25,7 +25,7 @@ license = apache2 status = 2 # Optional. Same format as setuptools requirements -requirements = pytest==6.* nbdev==1.* openai==0.* sentence_transformers==2.* pydantic==1.* langchain==0.0.* faiss-cpu==1.* duckduckgo_search==2.* undetected_chromedriver==3.* selenium_profiles==2.* markdownify==0.* airtest==1.* undetected_chromedriver==3.* webdriver_manager==4.* markdown==3.* +requirements = pytest==6.* nbdev==1.* openai==0.* sentence_transformers==2.* pydantic==1.* langchain==0.0.* faiss-cpu==1.* duckduckgo_search==2.* undetected_chromedriver==3.* selenium_profiles==2.* markdownify==0.* airtest==1.* undetected_chromedriver==3.* webdriver_manager==4.* markdown==3.* pyknp==0.* # Optional. Same format as setuptools console_scripts # console_scripts = # Optional. Same format as setuptools dependency-links