style: apply ruff formatting

BinWang28 · claude · BinWang28 · commit fdcbc6ba4792 · 2026-03-11T22:22:53.000+08:00
Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/miroflow/agents/iterative_agent_with_rollback.py b/miroflow/agents/iterative_agent_with_rollback.py
@@ -215,9 +215,11 @@ async def run_internal(self, ctx: AgentContext) -> AgentContext:
             if self.verbose:
                 usage = getattr(llm_output, "usage", None)
                 if usage:
-                    print(f"[Turn {turn_count}] LLM returned | "
-                          f"prompt_tokens={getattr(usage, 'prompt_tokens', '?')}, "
-                          f"completion_tokens={getattr(usage, 'completion_tokens', '?')}")
+                    print(
+                        f"[Turn {turn_count}] LLM returned | "
+                        f"prompt_tokens={getattr(usage, 'prompt_tokens', '?')}, "
+                        f"completion_tokens={getattr(usage, 'completion_tokens', '?')}"
+                    )
                 else:
                     print(f"[Turn {turn_count}] LLM returned (no usage info)")
                 resp_preview = (llm_output.response_text or "")[:200]
@@ -238,11 +240,17 @@ async def run_internal(self, ctx: AgentContext) -> AgentContext:
             )[0]
 
             if self.verbose and tool_and_sub_agent_calls:
-                print(f"[Turn {turn_count}] Tool calls ({len(tool_and_sub_agent_calls)}):")
+                print(
+                    f"[Turn {turn_count}] Tool calls ({len(tool_and_sub_agent_calls)}):"
+                )
                 for i, call in enumerate(tool_and_sub_agent_calls):
-                    args_preview = json.dumps(call.get("arguments", {}), ensure_ascii=False)[:150]
-                    print(f"  [{i+1}] {call.get('server_name', '?')}::{call.get('tool_name', '?')} "
-                          f"args={args_preview}")
+                    args_preview = json.dumps(
+                        call.get("arguments", {}), ensure_ascii=False
+                    )[:150]
+                    print(
+                        f"  [{i+1}] {call.get('server_name', '?')}::{call.get('tool_name', '?')} "
+                        f"args={args_preview}"
+                    )
 
             # Check if rollback is needed
             should_rollback, rollback_reason = self._should_rollback(
@@ -263,9 +271,11 @@ async def run_internal(self, ctx: AgentContext) -> AgentContext:
                         f"max={self.max_consecutive_rollbacks}"
                     )
                     if self.verbose:
-                        print(f"[Turn {turn_count}] ROLLBACK #{consecutive_rollbacks}: "
-                              f"reason={rollback_reason}, "
-                              f"max={self.max_consecutive_rollbacks}")
+                        print(
+                            f"[Turn {turn_count}] ROLLBACK #{consecutive_rollbacks}: "
+                            f"reason={rollback_reason}, "
+                            f"max={self.max_consecutive_rollbacks}"
+                        )
                     continue  # Retry this turn
                 else:
                     # Normal completion or max rollback count reached
@@ -342,12 +352,18 @@ async def run_internal(self, ctx: AgentContext) -> AgentContext:
                 )
 
                 if self.verbose:
-                    print(f"[Turn {turn_count}] Tool results: "
-                          f"{len(tool_results)} tool, "
-                          f"{len(sub_agent_results)} sub-agent, "
-                          f"{len(skill_results)} skill")
+                    print(
+                        f"[Turn {turn_count}] Tool results: "
+                        f"{len(tool_results)} tool, "
+                        f"{len(sub_agent_results)} sub-agent, "
+                        f"{len(skill_results)} skill"
+                    )
                     for r in tool_results:
-                        result_preview = str(r.get("result", ""))[:200] if isinstance(r, dict) else str(r)[:200]
+                        result_preview = (
+                            str(r.get("result", ""))[:200]
+                            if isinstance(r, dict)
+                            else str(r)[:200]
+                        )
                         print(f"  -> {result_preview}")
 
                 # Record executed queries for duplicate detection
@@ -398,8 +414,10 @@ async def run_internal(self, ctx: AgentContext) -> AgentContext:
         if self.verbose:
             final_answer = output_processor_result.get("final_boxed_answer", None)
             print(f"\n{'='*60}")
-            print(f"[DONE] Total turns: {turn_count} | "
-                  f"task_failed={task_failed} | reached_limit={reached_limit}")
+            print(
+                f"[DONE] Total turns: {turn_count} | "
+                f"task_failed={task_failed} | reached_limit={reached_limit}"
+            )
             print(f"[DONE] Final answer: {str(final_answer)[:300]}")
             print(f"{'='*60}\n")
 
diff --git a/miroflow/tool/mcp_servers/scrape_website_v1.py b/miroflow/tool/mcp_servers/scrape_website_v1.py
@@ -126,9 +126,7 @@ def smart_split_content(text: str, chunk_size: int, overlap: int) -> List[str]:
                 best_break_rel = idx + len(pattern)
                 break
 
-        effective_end = (
-            search_start + best_break_rel if best_break_rel != -1 else end
-        )
+        effective_end = search_start + best_break_rel if best_break_rel != -1 else end
 
         # Ensure we always move forward at least by 10% of chunk size or at least 'overlap'
         # to avoid infinite loops if overlap is too large
@@ -215,9 +213,7 @@ def get_content_score(res: Dict[str, Any], query: str = "") -> int:
             "about",
         }
         query_words = {
-            w
-            for w in re.findall(r"\w{3,}", query.lower())
-            if w not in stop_words
+            w for w in re.findall(r"\w{3,}", query.lower()) if w not in stop_words
         }
 
         if query_words:
@@ -295,10 +291,7 @@ def get_prompt_with_truncation(
         # Calculate how many chars to KEEP (not how many to remove)
         keep_chars = max(len(content) - truncate_last_num_chars, 2000)
         if keep_chars < len(content):
-            content = (
-                content[:keep_chars]
-                + "\n[...truncated due to length limits]"
-            )
+            content = content[:keep_chars] + "\n[...truncated due to length limits]"
     return EXTRACT_INFO_PROMPT.format(info_to_extract, content)
 
 
@@ -555,16 +548,13 @@ async def scrape_url_with_firecrawl(
                             "error": "",
                             "char_count": total_char_count,
                             "line_count": total_line_count,
-                            "all_content_displayed": total_char_count
-                            <= max_chars,
+                            "all_content_displayed": total_char_count <= max_chars,
                             "last_char_line": displayed_content.count("\n") + 1
                             if displayed_content
                             else 0,
                         }
                     else:
-                        error_msg = res_data.get(
-                            "error", "Unknown Firecrawl error"
-                        )
+                        error_msg = res_data.get("error", "Unknown Firecrawl error")
                         if attempt < len(retry_delays):
                             await asyncio.sleep(delay)
                             continue
@@ -655,18 +645,14 @@ async def scrape_url_with_playwright(
             content_type = response.headers.get("content-type", "").lower()
             content = ""
 
-            if (
-                "application/pdf" in content_type
-                or url.lower().endswith(".pdf")
-            ):
+            if "application/pdf" in content_type or url.lower().endswith(".pdf"):
                 pdf_bytes = await response.body()
                 if PdfReader:
                     with io.BytesIO(pdf_bytes) as f:
                         reader = PdfReader(f)
                         pages_to_read = min(len(reader.pages), 50)
                         content = "\n".join(
-                            reader.pages[i].extract_text()
-                            for i in range(pages_to_read)
+                            reader.pages[i].extract_text() for i in range(pages_to_read)
                         )
                 else:
                     content = "PDF detected but pypdf is not installed."
@@ -837,12 +823,12 @@ async def call_robust_llm(
                             logger.warning(
                                 f"LLM: Context limit hit (attempt {attempt + 1}). Retrying with gradient truncation..."
                             )
-                            payload["messages"][0][
-                                "content"
-                            ] = get_prompt_with_truncation(
-                                info_for_truncation,
-                                original_content,
-                                truncate_last_num_chars=40960 * (attempt + 1),
+                            payload["messages"][0]["content"] = (
+                                get_prompt_with_truncation(
+                                    info_for_truncation,
+                                    original_content,
+                                    truncate_last_num_chars=40960 * (attempt + 1),
+                                )
                             )
                             await asyncio.sleep(delay)
                             continue
@@ -866,9 +852,7 @@ async def call_robust_llm(
                 return {
                     "success": True,
                     "extracted_info": output,
-                    "tokens_used": data.get("usage", {}).get(
-                        "total_tokens", 0
-                    ),
+                    "tokens_used": data.get("usage", {}).get("total_tokens", 0),
                 }
 
         except Exception as e:
@@ -951,19 +935,15 @@ async def scrape_and_extract_info(
 
     # Tier 2: Firecrawl (First fallback, highly reliable)
     if best_score < 5:
-        logger.info(
-            f"Jina quality low (Score: {best_score}). Trying Firecrawl..."
-        )
+        logger.info(f"Jina quality low (Score: {best_score}). Trying Firecrawl...")
         fc_res = await scrape_url_with_firecrawl(url)
         fc_score = get_content_score(fc_res, info_to_extract)
         if fc_score > best_score:
             best_res, best_method, best_score = fc_res, "Firecrawl", fc_score
 
     # Tier 3: Playwright
     if best_score < 5:
-        logger.info(
-            f"Current quality low (Score: {best_score}). Trying Playwright..."
-        )
+        logger.info(f"Current quality low (Score: {best_score}). Trying Playwright...")
         pw_res = await scrape_url_with_playwright(url)
         pw_score = get_content_score(pw_res, info_to_extract)
         if pw_score > best_score:
@@ -983,8 +963,10 @@ async def scrape_and_extract_info(
                 py_score,
             )
 
-    if not best_res or not best_res.get("success") or (
-        best_score < 1 and best_res.get("char_count", 0) < 50
+    if (
+        not best_res
+        or not best_res.get("success")
+        or (best_score < 1 and best_res.get("char_count", 0) < 50)
     ):
         return json.dumps(
             {
@@ -1031,9 +1013,7 @@ async def scrape_and_extract_info(
 
         async def sem_call_robust_llm(chunk_text):
             async with semaphore:
-                chunk_prompt = EXTRACT_INFO_PROMPT.format(
-                    info_to_extract, chunk_text
-                )
+                chunk_prompt = EXTRACT_INFO_PROMPT.format(info_to_extract, chunk_text)
                 return await call_robust_llm(
                     chunk_prompt,
                     temperature=0.2,
@@ -1042,9 +1022,7 @@ async def sem_call_robust_llm(chunk_text):
                 )
 
         # Map Phase: Parallel extraction with concurrency control
-        chunk_results = await asyncio.gather(
-            *(sem_call_robust_llm(c) for c in chunks)
-        )
+        chunk_results = await asyncio.gather(*(sem_call_robust_llm(c) for c in chunks))
 
         # Filter successful findings
         valid_partials = []
@@ -1056,9 +1034,7 @@ async def sem_call_robust_llm(chunk_text):
                 valid_partials.append(text)
 
         if not valid_partials:
-            logger.warning(
-                "No extracted information available from any chunk."
-            )
+            logger.warning("No extracted information available from any chunk.")
             final_info = (
                 "The requested information was not found in the provided document."
             )
@@ -1071,9 +1047,7 @@ async def sem_call_robust_llm(chunk_text):
             )
             formatted_fragments = ""
             for i, partial in enumerate(valid_partials):
-                formatted_fragments += (
-                    f"--- FRAGMENT {i + 1} ---\n{partial}\n\n"
-                )
+                formatted_fragments += f"--- FRAGMENT {i + 1} ---\n{partial}\n\n"
 
             reduce_prompt = REDUCE_PROMPT.format(
                 info=info_to_extract, partials=formatted_fragments
@@ -1094,10 +1068,7 @@ async def sem_call_robust_llm(chunk_text):
     verification_note = ""
     if final_info:
         info_lower = final_info.lower()
-        if (
-            "[confidence: low" in info_lower
-            or "requires verification" in info_lower
-        ):
+        if "[confidence: low" in info_lower or "requires verification" in info_lower:
             verification_note = "LOW CONFIDENCE: This information has low reliability. Strongly recommend verifying with additional independent sources before using."
         elif (
             "[confidence: medium" in info_lower