From 5dab01e84533277d03a2d0b512a2a2243b55abdb Mon Sep 17 00:00:00 2001 From: skypank Date: Sun, 22 Mar 2026 17:25:55 +0530 Subject: [PATCH] feat: add dynamic Sugar docs fetcher and /refresh-docs endpoint (fixes #91) --- .example.env | 7 + README.md | 227 ++++++++++++++++ app/ai.py | 10 +- app/routes/api.py | 70 +++++ docs/sugar-contributing.txt | 444 ++++++++++++++++++++++++++++++++ docs/sugar-desktop-activity.txt | 247 ++++++++++++++++++ docs/sugar-web-activity.txt | 288 +++++++++++++++++++++ requirements.txt | 1 + scripts/__init__.py | 3 + scripts/fetch_sugar_docs.py | 295 +++++++++++++++++++++ 10 files changed, 1591 insertions(+), 1 deletion(-) create mode 100644 docs/sugar-contributing.txt create mode 100644 docs/sugar-desktop-activity.txt create mode 100644 docs/sugar-web-activity.txt create mode 100644 scripts/__init__.py create mode 100644 scripts/fetch_sugar_docs.py diff --git a/.example.env b/.example.env index eee7a6b..7166350 100644 --- a/.example.env +++ b/.example.env @@ -33,3 +33,10 @@ SESSION_SECRET_KEY=your_secret_key WEBHOOK_SECRET=your_webhook_secret REPO_PATH_LOCALLY=/path/to/sugar-ai GIT_PATH=/usr/bin/git + +# GitHub API authentication (optional) +# Used when fetching Sugar documentation from GitHub +# Provides higher API rate limits (5000 requests/hour vs 60) +# Generate token at: https://github.com/settings/tokens +# Required scopes: public_repo (read-only access) +GITHUB_TOKEN=your_github_personal_access_token diff --git a/README.md b/README.md index 2fc373f..b94b2f3 100644 --- a/README.md +++ b/README.md @@ -499,6 +499,233 @@ Review the terminal output for further details and error messages. When deploying Sugar-AI in CI/CD pipelines, you'll need to configure environment variables properly. Current CI/CD uses github webhooks. So make sure to create a webhook secret and add it to the `.env`. +## Keeping RAG Docs Up to Date + +Sugar-AI includes a dynamic document fetching system to keep your RAG (Retrieval-Augmented Generation) documentation fresh and current. Instead of manually managing static documentation files, you can automatically fetch and index the latest Sugar documentation from GitHub. + +### Overview + +The dynamic document fetching system: +- Fetches documentation from Sugar Labs repositories on GitHub +- Converts markdown to clean plain text (removes headers, HTML tags, etc.) +- Adds metadata about the source URL and fetch timestamp +- Rebuilds the vector store for efficient retrieval +- Supports GitHub API authentication for higher rate limits + +### Manual Document Fetching + +To fetch and update documentation manually: + +#### Basic Usage (No Authentication) + +```bash +python scripts/fetch_sugar_docs.py +``` + +This will fetch all configured documentation sources and save them to the `docs/` directory. + +#### With GitHub Authentication + +For higher API rate limits, you can provide a GitHub personal access token: + +```bash +export GITHUB_TOKEN=your_github_personal_access_token +python scripts/fetch_sugar_docs.py +``` + +To generate a GitHub token: +1. Go to https://github.com/settings/tokens +2. Click "Generate new token (classic)" +3. Select scope: `public_repo` (read-only access) +4. Copy the token and use it as shown above + +#### Expected Output + +``` +============================================================ +SUGAR-AI DOCUMENT FETCH SUMMARY +============================================================ +Timestamp: 2026-03-21T10:30:45.123456 +Total documents attempted: 3 +Successfully fetched: 3 +Failed: 0 + +Fetched documents: + ✓ sugar-activity.txt + ✓ sugar-activity-tutorial.txt + ✓ hello-world-readme.txt + +============================================================ +Fetched 3 docs successfully +``` + +#### Handling Errors + +The script handles common errors gracefully: +- **404 Not Found**: If a documentation URL no longer exists +- **Network Failures**: Connection timeouts or network errors +- **Authentication Issues**: Invalid or expired GitHub tokens + +Failed documents are reported in the output, and remaining documents are still fetched and indexed. + +### Automated Document Refreshing via API + +#### Using the /refresh-docs Endpoint + +For automated updates, you can use the `/refresh-docs` endpoint. This requires admin permissions (`can_change_model: true` in your API key configuration). + +#### Example Request + +```bash +curl -X POST "http://localhost:8000/refresh-docs" \ + -H "X-API-Key: sugarai2024" +``` + +#### Example Response + +```json +{ + "status": "success", + "docs_refreshed": [ + "sugar-activity.txt", + "sugar-activity-tutorial.txt", + "hello-world-readme.txt" + ], + "vectorstore_rebuilt": true, + "timestamp": "2026-03-21T10:45:30.123456", + "total_docs_count": 6 +} +``` + +#### Using OAuth Authentication + +If you're authenticated via OAuth with admin permissions: + +```bash +# Using OAuth session (admin with can_change_model: true) +curl -X POST "http://localhost:8000/refresh-docs" \ + -H "Cookie: session=your_session_cookie" +``` + +#### Error Handling + +If the refresh fails, the endpoint returns an error response: + +```json +{ + "detail": "Failed to fetch some documents: Document not found (404): https://raw.githubusercontent.com/sugarlabs/sugar-docs/master/src/sugar-activity.md" +} +``` + +### Configuration + +The documentation sources are defined in `scripts/fetch_sugar_docs.py`: + +```python +DOCS_TO_FETCH = [ + { + "url": "https://raw.githubusercontent.com/sugarlabs/sugar-docs/master/src/sugar-activity.md", + "filename": "sugar-activity.txt" + }, + { + "url": "https://raw.githubusercontent.com/sugarlabs/sugar-docs/master/src/sugar-activity-tutorial.md", + "filename": "sugar-activity-tutorial.txt" + }, + { + "url": "https://raw.githubusercontent.com/sugarlabs/hello-world/master/README.md", + "filename": "hello-world-readme.txt" + } +] +``` + +To add more documentation sources, edit this list with additional `url` and `filename` pairs. + +### Scheduling Document Updates + +#### Using Cron (Unix/Linux/macOS) + +Schedule automatic document updates daily at 2 AM: + +```bash +0 2 * * * cd /path/to/sugar-ai && GITHUB_TOKEN=your_token python scripts/fetch_sugar_docs.py +``` + +#### Using GitHub Actions + +Create `.github/workflows/refresh-docs.yml`: + +```yaml +name: Refresh Sugar Docs + +on: + schedule: + # Runs at 2 AM UTC daily + - cron: '0 2 * * *' + workflow_dispatch: # Allow manual trigger + +jobs: + refresh-docs: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.9' + + - name: Install dependencies + run: pip install -r requirements.txt + + - name: Fetch and update docs + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: python scripts/fetch_sugar_docs.py + + - name: Commit and push updated docs + run: | + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" + git add docs/ + git commit -m "chore: update Sugar documentation" + git push +``` + +### Document Format + +Each fetched document includes: +1. **Header**: Source URL and timestamp in format `# Fetched from [url] on [timestamp]` +2. **Content**: Converted from markdown to plain text + - Markdown headers (`#`, `##`, etc.) converted to plain text + - HTML tags removed + - Excessive whitespace cleaned up + - Maintains readability for RAG retrieval + +### Troubleshooting + +#### Rate Limiting Issues + +Without GitHub authentication, you're limited to 60 requests/hour. With authentication: +- Classic tokens: 5000 requests/hour +- App tokens: Higher limits depending on configuration + +If you see rate limit errors, use GitHub authentication as shown above. + +#### Mixed Authentication Methods + +The system supports multiple authentication methods in order of preference: +1. **X-API-Key Header**: API key-based access +2. **Admin Cookie**: OAuth session with admin permissions +3. **Unauthenticated**: Via environment variable `GITHUB_TOKEN` + +#### Rebuild Issues + +If the vectorstore rebuild fails, check: +1. Document files exist in the `docs/` directory +2. Documents are readable and contain valid text +3. Sufficient system memory for embedding generation +4. Check logs: `tail -f sugar_ai.log` + ## Using the Streamlit App Sugar-AI also provides a Streamlit-based interface for quick interactions and visualizations. diff --git a/app/ai.py b/app/ai.py index 9f07554..f04c3ec 100644 --- a/app/ai.py +++ b/app/ai.py @@ -141,10 +141,18 @@ def setup_vectorstore(self, file_paths: List[str]) -> Optional[FAISS]: if file_path.endswith(".pdf"): loader = PyMuPDFLoader(file_path) else: - loader = TextLoader(file_path) + loader = TextLoader(file_path, encoding="utf-8") documents = loader.load() all_documents.extend(documents) + # Filter out documents with minimal content (less than 50 characters) + # This removes placeholder, stub, or empty documents that don't add value to RAG + # More lenient for PDFs which may have sparse content on some pages + all_documents = [doc for doc in all_documents if len(doc.page_content.strip()) > 50] + + if not all_documents: + raise ValueError("No valid documents found after filtering. Check that document files contain sufficient content.") + embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2" ) diff --git a/app/routes/api.py b/app/routes/api.py index f2d8c27..d3c98b7 100644 --- a/app/routes/api.py +++ b/app/routes/api.py @@ -10,10 +10,16 @@ import json from datetime import datetime from typing import Dict, Optional, List +import sys from app.database import get_db, APIKey from app.ai import RAGAgent, extract_answer_from_output from app.config import settings +from app.auth import get_current_user + +# Import document fetching module +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) +from scripts.fetch_sugar_docs import fetch_all_docs # Pydantic models for chat completions class ChatMessage(BaseModel): @@ -326,3 +332,67 @@ async def change_model( except Exception as e: logger.error(f"Error changing model to {model} by {user_info['name']}: {str(e)}") raise HTTPException(status_code=500, detail=f"Error changing model: {str(e)}") + +@router.post("/refresh-docs") +async def refresh_docs( + user_data: tuple = Depends(get_current_user), + db: Session = Depends(get_db), + request: Request = None +): + """ + Refresh documentation by fetching latest docs from GitHub and rebuilding vectorstore. + Requires admin permission (can_change_model: true). + """ + # Extract user from tuple (user, authenticated) + user, authenticated = user_data + client_ip = request.client.host if request else "unknown" + + # Check authentication + if not authenticated or not user or not user.can_change_model: + logger.warning(f"Unauthorized refresh-docs attempt from {client_ip}") + raise HTTPException(status_code=403, detail="Unauthorized. Admin permission required.") + + logger.info(f"REQUEST - /refresh-docs - User: {user.name} - IP: {client_ip}") + + try: + timestamp = datetime.now().isoformat() + + # Get GitHub token from environment (optional) + github_token = os.getenv("GITHUB_TOKEN", None) + + # Fetch all documents + logger.info("Fetching Sugar documentation from GitHub...") + fetch_results = fetch_all_docs(github_token=github_token) + + if not fetch_results["success"]: + error_msg = f"Failed to fetch some documents: {', '.join(fetch_results['errors'])}" + logger.error(f"Error refreshing docs - User: {user.name} - {error_msg}") + raise HTTPException(status_code=500, detail=error_msg) + + # Rebuild vectorstore with all docs + logger.info("Rebuilding vectorstore with fetched documents...") + doc_paths = settings.DOC_PATHS + + # Add newly fetched docs to the list if they're not already there + docs_dir = "docs" + fetched_files = [os.path.join(docs_dir, doc) for doc in fetch_results["fetched_docs"]] + all_docs = list(set(doc_paths + fetched_files)) + + # Rebuild vectorstore + agent.setup_vectorstore(all_docs) + + logger.info(f"SUCCESS - /refresh-docs - User: {user.name} - Fetched {len(fetch_results['fetched_docs'])} docs") + + return { + "status": "success", + "docs_refreshed": fetch_results["fetched_docs"], + "vectorstore_rebuilt": True, + "timestamp": timestamp, + "total_docs_count": len(all_docs) + } + + except HTTPException: + raise + except Exception as e: + logger.error(f"ERROR - /refresh-docs - User: {user.name} - Error: {str(e)}") + raise HTTPException(status_code=500, detail=f"Error refreshing documentation: {str(e)}") diff --git a/docs/sugar-contributing.txt b/docs/sugar-contributing.txt new file mode 100644 index 0000000..50fa152 --- /dev/null +++ b/docs/sugar-contributing.txt @@ -0,0 +1,444 @@ +# Fetched from https://raw.githubusercontent.com/sugarlabs/sugar-docs/master/src/contributing.md on 2026-03-22T01:41:26.535286 + +Contributing + +This is our guide to contributing code. In brief, you will: + +* select a Sugar Labs software product, i.e. Sugarizer, Sugar, Music + Blocks, or one of the hundreds of learning activities, +* use the software, so that you know from direct experience what to + change, +* find a problem that interests you, + * make a pull request, if you can fix it, see [Workflow](#workflow), + * add an issue, but only if you cannot or will not fix the problem, +* look at other issues and see if you can help, +* review other pull requests, see [Guide for Reviewers](#guide-for-reviewers) + +Make your own decision on what to do. + +Do not ask permission to work. + +Modifying Activities + +Most activity repositories can be found in our [GitHub `sugarlabs` +organization](https://github.com/sugarlabs). + +A few activity repositories are somewhere else; to find them read the +`activity/activity.info` file, check the metadata on the +[activities.sugarlabs.org app +store](https://activities.sugarlabs.org/), or the [Activity page on +wiki.sugarlabs.org](https://wiki.sugarlabs.org/go/Activity), or our +deprecated [gitorious instance](https://git.sugarlabs.org/). + +For new activities, see [Write your own Sugar desktop +activity](desktop-activity.md), or [Write your own Sugar web +activity](web-activity.md), then make a new repository in your +GitHub account, put the source code in it, then ask the [systems@ +list](https://lists.sugarlabs.org/listinfo/systems) to move it to the +GitHub `sugarlabs` organization. + +Checklist - everyone + +* [ ] run the activity and confirm that you understand what it is for, what it does, and how a child will use it, + +* [ ] make a fork and clone it, + +* [ ] check if what you want to change is available already in any other branches or forks, + +* [ ] make and [test](#testing) your changes, + +* [ ] if your changes add a new feature or will affect users; update the NEWS file, the README.md file, and the help-activity, + +* [ ] if there is a `po/*.pot` file, and your changes affect translated strings; regenerate using `python setup.py genpot`, + +* [ ] make a branch, one or more commits, and a pull request, see [Workflow](#workflow) below. + +After modifying an activity, a new release will be needed. Some activities have no maintainer, so you may need to be the maintainer for a short time. + +Checklist - maintainer + +* [ ] for Python 2 branches, check version of latest bundle release in + [activities.sugarlabs.org](https://activities.sugarlabs.org/), + +* [ ] check version of latest tarball release in [download.sugarlabs.org/sources/sucrose/fructose/](https://download.sugarlabs.org/sources/sucrose/fructose/) or [download.sugarlabs.org/sources/honey/](https://download.sugarlabs.org/sources/honey/), + +* [ ] check for a release version git tag, e.g. v34, + +* [ ] correlate with `activity_version` metadata in `activity/activity.info`, + +* [ ] look for commits _after_ any of these, in either; + + * [ ] master branch of repository at sugarlabs, + * [ ] any other branches, + * [ ] any other forks, + * [ ] orphaned repositories with the same `bundle_id` value, using GitHub or Google Search, + * [ ] deprecated repositories at git.sugarlabs.org, + +* [ ] review and merge all pull requests, + +* [ ] apply all desired commits, making pull requests if review is needed, + +* [ ] apply any [translate.sugarlabs.org](https://translate.sugarlabs.org) changes, e.g. using a [download script](https://github.com/sugarlabs/sugar-tools/blob/master/activity-translations), + +* [ ] if there is a `po/*.pot` file, regenerate using `python setup.py genpot`, review the changes, and commit, + +* [ ] If the activity is on [weblate](weblate.sugarlabs.org), check to make sure that the changes on weblate are also on the repo +as weblate automatically pushes changes, you can ask for the admin in our matrix channel if you notice that weblate changes aren't +reflected on the repo. + +* [ ] update the README.md file if necessary, + +* [ ] write release notes for the NEWS file, change the `activity_version` metadata in `activity/activity.info`, commit, and `git tag` the version, + +* [ ] update the activity documentation in the help-activity repository, + +* [ ] for activities that include a tarball release, or where Fedora or Debian packages may be made, create a tarball using `python setup.py dist_source`, and upload tarball to download.sugarlabs.org using shell account, + +* [ ] create bundle using `python setup.py dist_xo`, and test that it + can be installed by Browse, + +* [ ] for Python 2 branches only, upload to activities.sugarlabs.org + using developer account, + +* [ ] rebase any other maintained branches or pull requests, such as + those for past or future versions of Python, or past releases of + Fedora, Ubuntu or libraries. + +* [ ] for Python 3 Activities, copy the bundle to `~activities-v4/bundles`, + remove any other old versions of the bundle from `~activities-v4/bundles`, and trigger + a rebuild of the static site using `rebuild-aslo` command, verify if your bundle is + visible on https://v4.activities.sugarlabs.org . For more information, it is + recommended to take a look at the [Activities4 wiki page](https://wiki.sugarlabs.org/go/Service/activities4) + +Modifying Sugar + +Sugar repositories can be found in our [GitHub `sugarlabs` +organization](https://github.com/sugarlabs). Sugar desktop +environment repositories are: + +* https://github.com/sugarlabs/sugar (the desktop shell); +* https://github.com/sugarlabs/sugar-artwork (images, icons, themes); +* https://github.com/sugarlabs/sugar-toolkit-gtk4 (graphical widget library); and, +* https://github.com/sugarlabs/sugar-datastore (journal backend). + +Workflow + +We use a pull-request workflow, see [GitHub's help on pull-request](https://help.github.com/articles/using-pull-requests). In brief, you will: + +* find and fork the source repository, see [Forking](#forking) below, +* on your computer, clone your fork repository, see [Cloning](#cloning), +* commit your changes in a new branch, see [Branching](#branching) and [Making Commits](#making-commits), +* push your branch and submit a pull-request for it, and; +* respond to review until your pull-request is merged or the problem is solved in another way. + +Forking + +You must first fork a repository on GitHub. +This step is needed only once for each repository. +See [complete help in GitHub](https://help.github.com/articles/fork-a-repo). + +Cloning + +You must clone your fork. +This step is needed only once for each repository. +Using [sugar](https://github.com/sugarlabs/sugar) as example; + +``` +git clone git@github.com:YOUR-NAME/sugar.git +cd sugar +git remote add upstream https://github.com/sugarlabs/sugar.git +git fetch upstream +``` + +Branching + +Create a branch per set of changes; e.g. to fix a problem or add a feature; + +``` +git checkout -b BRANCH-NAME +``` + +Your BRANCH-NAME can be anything, other than master. The scope is your forked repository. The branch name will be shown on pull-requests. + +Making commits + +Change files, and commit. Commit messages are kept by git, and are used later when problems are being solved. When writing a commit message; + +1. start with a one line summary of the change; +2. leave a blank line after the summary; +3. explain the problem that is solved, unless the summary makes it obvious; +4. when the problem was introduced by a previous commit, mention the hash; +5. when the problem is in an issue or ticket, add "Fixes #1234"; +6. avoid mentioning GitHub or other pull-requests, as these are not kept in git; +7. avoid mentioning any contest tasks or mentors; use pull-request comments instead; and +8. use imperative mood, like "add foo", or "port to bar"; (if English is not your first language, see [imperative mood](https://en.wikipedia.org/wiki/Imperative_mood), [git documentation](https://git.kernel.org/pub/scm/git/git.git/tree/Documentation/SubmittingPatches#n133) and [blog post by Dan Clarke](https://www.danclarke.com/git-tense)). + +See additional guidance [How to Write a Git Commit Message](https://chris.beams.io/git-commit) from 2014. + +Make one or more commits. + +Pushing + +Push the branch to your repository; + +``` +git push origin BRANCH-NAME +``` + +Create a pull-request + +Send a pull-request for your branch. +Navigate to your repository page in GitHub, switch to the branch you made, and then press the **Pull Request** button. + +When writing a pull-request message; + +1. if there is only one commit, begin with the GitHub default of the commit message, otherwise write a summary of the series of commits; +2. link to any relevant pull-requests, issues, or tickets; and +3. link to any contest tasks, and name your @mentors to subscribe them. + +A review will happen in the pull-request, and a reviewer will either; + +1. merge, squash, or rebase your commits; +2. merge your commits with their own changes; +3. ask you to make changes; or +4. close and reject your pull-request giving reasons. + +When they ask you for changes, you may have to change both files, commits or commit messages. + +When squashing commits to different files, use interactive rebase. + +``` +git rebase -i master +``` + +After resolving any conflicts, push the changes to the same branch; + +``` +git push --force origin +``` + +Then respond on the pull-request. + +Keep your pull-request up to date + +When there have been upstream commits while your pull-request was open, you should rebase your pull-request; + +``` +git pull --rebase upstream +``` + +Then push the changes to the same branch; + +``` +git push --force origin +``` + +The pull-request will be updated. + +Keep your fork up to date + +When there have been upstream commits since your fork was made, you should bring these into your fork: + +``` +git checkout master +git pull upstream +git checkout BRANCH-NAME +``` + +Review + +We encourage [testing](#testing) before merging a pull-request. + +So instead of merging directly with the "merge" button on GitHub, we may do a local merge, then test, then push. + +See [GitHub help on merging a pull-request](https://help.github.com/articles/merging-a-pull-request). + +The GitHub page for the pull-request will provide you the right commands to do the local merge, similar to the following. + +Get the changes from that branch to a new local branch: + +``` +git checkout -b SOME-USER-topic1 master +git pull https://github.com/SOME-USER/sugar.git topic1 +``` + +[Test](#testing)! If everything is fine, merge: + +``` +git checkout master +git rebase SOME-USER-topic1 +git push origin master +``` + +Close Issue + +Once your pull-request is merged, you should close any issue or ticket. GitHub issues named as "Fixes" in a commit message may be automatically closed. + +Be sure to thank everyone who helped you out along the way. + +Testing +------- + +When testing activities; + +* activity must start, + +* activity must refresh entire display when focus is restored, + +* activity must behave predictably (except where randomness is designed), + +* activity must save data to journal, + +* activity must restore the saved data from journal (click on the journal entry), + +* other activities must be able to use saved data, if it is declared with the relevant content type, + +* every coded feature should either work properly, or be removed if it cannot be fixed, + +* collaboration support, if present, must function properly between two or more systems, + +* activity should not consume all available battery power (e.g. pygame clock rate too high), + +* activity should not contain any security vulnerabilities, + +* activity should not reveal personal information. + +AI guidelines for Sugar Labs contributions + +Generative AI tools continue to evolved rapidly. They can be helpful +in coding and they are being used to contribute code to our software +repositories. As with using any tool, the code generated with AI is +the responsibility of the contributor. + +We value good code, concise accurate documentation, and avoiding +unneeded code churn. Discretion, good judgment, and critical thinking +are the foundation of all good contributions, regardless of the tools +used in their creation. + +You may use any tools that help you understand our software and write +good code, including AI tools. However, you must always understand +and be able to explain the changes you're proposing to make, whether +or not you used an LLM as part of your process to produce them. The +answer to “Why is X an improvement?” should never be “I'm not +sure. The AI did it.” + +Acceptable uses + +Some of the acceptable uses of generative AI include: + +* Assistance with writing comments, especially in a non-native language + +* Gaining understanding of existing code + +* Supplementing contributor knowledge for code, tests, and documentation + +Recommendations + +It is recommended that you: + +* Don't skip becoming familiar with the part of the codebase you're +working on. This will let you write better prompts and validate their +output if you use an LLM. + +* Split up your changes into coherent commits, even if an LLM generates +them all in one go. + +* Consider whether the change is necessary. + +* Make minimal, focused changes. + +* Follow existing coding style and patterns. + +* Write tests that exercise the change. + +Further considerations + +* Maintainers may close issues and PRs that are not useful or +productive, including those that are fully generated by AI. If a +contributor repeatedly opens unproductive issues or PRs, they may be +blocked. + +* When writing a pull request description, do not include anything +that's obvious from looking at your changes directly (e.g., files +changed, functions updated, etc.). Instead, focus on the why behind +your changes. Don't ask an LLM to generate a PR description on your +behalf based on your code changes, as it will simply regurgitate the +information that's already there. + +* Similarly, when responding to a pull request comment, explain your +reasoning. Don't prompt an LLM to re-describe what can already be seen +from the code. + +* Verify that everything you write is accurate, whether or not an LLM +generated any part of it. Maintainers will be unable to review your +contributions if you misrepresent your work (e.g., misdescribing your +code changes, their effect, or your testing process). + +* Clarity and succinctness are much more important than perfect grammar, +so you shouldn't feel obliged to pass your writing through an LLM. If +you do ask an LLM to clean up your writing style, be sure it does not +make it longer in the process. Demand succinctness in your prompt. + +Guide for Reviewers +------------------- + +Goals + +Goals for review are to; + +* detect trivial mistakes, + +* maintain consistent and good code quality, + +* reproduce [test](#testing) results, (especially for critical repositories), + +* maintain a useful git commit history for use by git bisect, and developers who read it, + +* maintain other records, such as issues, tickets, and documentation, + +* not waste the time of the contributor, by doing anything trivial that otherwise the contributor might have to do. + +Checklist - review + +* [ ] does the change have consensus of the community, see also [code of conduct](CODE_OF_CONDUCT.md) (if a reviewer is in doubt, seek opinions by @mentioning people), + +* [ ] does the commit message explain the summary, problem, and solution, so that it can be used in future analysis, see also [making commits](#making-commits) (if a reviewer can fix it by squash or manual rebase, do so), + +* [ ] does the commit message reference any issue, bugs.sugarlabs.org ticket number, or downstream ticket numbers, (if a reviewer can fix it by squash or manual rebase, do so), + +* [ ] are the number of commits excessive for future analysis, (a reviewer may squash or rebase if necessary), + +* [ ] is the changed code consistent in style with the existing code, see also [coding standards](desktop-activity.md#coding-standards), (on the other hand, expect flake8 changes to be in separate commits), + +* [ ] for critical repositories, does the change work properly on our latest version of Sugar on either Fedora, Debian, or Ubuntu. + +Critical repositories + +- sugar, sugar-toolkit, sugar-toolkit-gtk3, sugar-artwork, sugar-datastore, gst-plugins-espeak, + +- each of the [Fructose](https://wiki.sugarlabs.org/go/Development_Team/Release/Modules#Fructose) activity set repositories, + +Frequently Asked Questions + +I've used the GitHub editor, how can I rebase or amend commits? + +Make a local clone of your GitHub repository, use `git commit --amend` or the other advanced CLI features, then `git push` back to GitHub. + +Error 403 on `git push` + +Most likely you have cloned someone else's repository, and you should +instead fork their repository, clone your own repository, make your +changes, then push. See [Getting error 403 while submitting +PR](http://lists.sugarlabs.org/archive/sugar-devel/2017-March/053926.html) +and [D. Joe's +reply](http://lists.sugarlabs.org/archive/sugar-devel/2017-March/053929.html). + +My fork is out of date. How do I sync it with upstream? + +See GitHub’s guide on syncing a fork: +https://docs.github.com/en/get-started/quickstart/fork-a-repo#syncing-a-fork + +Where can I ask questions or get help from the community? + +Join the Sugar Labs community chat or mailing lists: +https://matrix.to/#/#sugar:matrix.org \ No newline at end of file diff --git a/docs/sugar-desktop-activity.txt b/docs/sugar-desktop-activity.txt new file mode 100644 index 0000000..66ae6b4 --- /dev/null +++ b/docs/sugar-desktop-activity.txt @@ -0,0 +1,247 @@ +# Fetched from https://raw.githubusercontent.com/sugarlabs/sugar-docs/master/src/desktop-activity.md on 2026-03-22T01:41:26.535286 + +Write your own Sugar desktop activity +===================================== + +Most Sugar desktop activities are written in Python, using our [Sugar +Toolkit](https://github.com/sugarlabs/sugar-toolkit-gtk3) libraries. + +Some Sugar desktop activities are written in languages other than +Python, such as Smalltalk, C, and JavaScript. For new activities to +run on both Sugar desktop and Sugarizer, please consider [Write your +own Sugar web activity](web-activity.md). + +This page shows how to develop an activity using Python 3 with Sugar Toolkit. + +Setting up a development environment + +[Setup a development +environment](https://github.com/sugarlabs/sugar/blob/master/docs/development-environment.md), for testing your activity and +releasing it for distribution. + +Create a new activity from a template + +Locate the activity directories. They may include: + +* `~/Activities` for native Sugar desktop, and packaged Sugar desktop on Fedora, Debian or Ubuntu; +* `/usr/share/sugar/activities` for packaged Sugar desktop; and, +* `~/sugar-build/activities` for *sugar-build* only. + +Each installed activity is in a directory under the activity +directories. This is where you will create your new activity. + +Clone the Hello World activity from +[hello-world on GitHub](https://github.com/sugarlabs/hello-world-fork): +``` +git clone https://github.com/sugarlabs/hello-world-fork.git Name.activity +``` +Use the `.activity` suffix in the directory name of an activity, +because that's the way an activity bundle is released. + +Customize + +Your clone of the Hello World activity contains a file, +`activity/activity.info`: +``` +[Activity] +name = HelloWorld +activity_version = 1 +bundle_id = org.sugarlabs.HelloWorld +exec = sugar-activity3 activity.HelloWorldActivity +icon = activity-helloworld +licence = GPLv3+ +repository = https://github.com/sugarlabs/hello-world-fork.git +``` +You must set a new `name` and a unique `bundle_id` for your activity. + +Avoid punctuation in your `bundle_id`, e.g., +`org.sugarlabs.my-activity-name` is not valid. Instead, use +CamelCase, e.g., `org.sugarlabs.MyActivity`. + +You should change the Activity class in your `activity.py` file, e.g., from: +``` +class HelloWorldActivity(activity.Activity): +``` +to: +``` +class MyActivity(activity.Activity): +``` +You must change the `exec` field as well, e.g., from: +``` +exec = sugar-activity3 activity.HelloWorldActivity +``` +to: +``` +exec = sugar-activity3 activity.MyActivity +``` +You should set the repository field to the URL of the git repository +of your project. + +And we recommend that you use a GPLv3+ license. + +Your `activity.info` file will look something like: +``` +[Activity] +name = MyActivity +activity_version = 1 +bundle_id = org.sugarlabs.MyActivity +exec = sugar-activity3 activity.MyActivity +icon = activity-helloworld +licence = GPLv3+ +repository = https://github.com/MyGitHubAccount/MyActivityRepo.git +``` +To read more about the `activity.info` file, see [Activity +Bundles](https://wiki.sugarlabs.org/go/Development_Team/Almanac/Activity_Bundles) +on our Wiki. + +You must make your activity icon unique in the Sugar interface by +making a new one, or borrowing from another icon and making changes. +Ask for help from the community if you don't feel comfortable with +graphic design. + +Here is `activity/activity-helloworld.svg`; + +![Activity Icon](images/activity-helloworld.svg "Activity icon") + +You should rename this file and change `icon` in the `activity.info` +file. + +Your activity icon must follow the guidelines as described in +[The Sugar Interface: +Icons](https://wiki.sugarlabs.org/go/Human_Interface_Guidelines/The_Sugar_Interface/Icons) on our Wiki. + +There is a helper script, [Sugar +Iconify](sugar-iconify.md) that will help +you create Sugar-compliant icons. + +Of course, the interesting changes will be the ones you make to the +activity itself. Below you will find links to some resources +on Sugar Activity development, but perhaps the +best way to get started is to modify an existing activity that has +features similar to the one you want to create. + +Running your activity + +Launch Sugar and your new activity should be immediately available, +although since it has not yet been selected as a favorite, it will +not appear by default on the Sugar Home View (F3). You need to either; + +* type the name of your activity into the search entry and press enter; or, + +* select the List View (ctrl+2) to see your activity, and click on it. + +If all goes well, your activity will launch. + +There are many opportunities to make mistakes. Don't get discouraged, +as debugging is a great way to learn. One useful tool is the Log +Activity, which will show you the log files of the operating system, +Sugar and activities. Alternatively, you can look at the log files +from the command line. + +Log files are usually in the directory `~/.sugar/default/logs`. + +Log files for *sugar-build* are in the directory `~/sugar-build/home/dotsugar/default/logs`. + +Log files are named using the `bundle_id`. + +You may also test interactively by starting Terminal, then `cd` to the activity directory, and type: +``` +sugar-activity . +``` +if your activity uses python2. If it uses python3, type: + +``` +sugar-activity3 . +``` +File structure + +All activities should follow this file structure: +``` +MyActivity.activity/ +|-- activity/ +| |-- activity.info +| `-- activity-icon.svg +|-- activity.py +`-- setup.py +``` +* `activity/` contains information about your activity, including the + `name`, `bundle_id`, and the `icon`. + +* `activity.py` contains an instance of the activity class, which is + run when your activity is launched. + +* `setup.py` lets you install your activity or make an installable + bundle with it. + +Translation + +Sugar serves a global audience, so it is important to enable your +activity for internationalization and localization. A [guide to best +practices](https://wiki.sugarlabs.org/go/Translation_Team/i18n_Best_Practices) +is on our Wiki. + +Revision control your code + +For development you can initialize the repository as a git +repository. This will help you to track your changes. First use [git +init](https://www.kernel.org/pub/software/scm/git/docs/git-init.html) +to initialize the repository: +``` +git init +``` +With [git +status](https://www.kernel.org/pub/software/scm/git/docs/git-status.html) +you can show the available files in the folder they are still +untracked. Now add all the files in the directory and commit those +changes, you can use git status again to see the current state: +``` +git add . +git commit -a -m 'Initial import' +git status +``` +We recommend that you use [github](http://github.com) to host your +activity. + +Check Python Coverage of your activity + +You can check the Python Coverage of your activity by following [this](python-coverage-guide.md) guide. + +Ready to release + +Once your activity is working, you can ask to have +your activity repository hosted under the [Sugar Labs github +organization](http://github.com/sugarlabs). + +Make an XO bundle. +``` +python setup.py dist_xo +``` +And if it works with Python 2 then upload it to the Sugar Activity +Library . +After that, users of Sugar can download and install your activity. + +For further releases, you must update the activity_version in +`activity/activity.info`. + +More details +============ + +* [Sugar Toolkit Documentation](https://developer.sugarlabs.org/sugar3/). + +* [Python GTK+ 3 Tutorial](http://python-gtk-3-tutorial.readthedocs.io/en/latest/). + +* [Make Your Own Sugar Activities](https://flossmanuals.net/make-your-own-sugar-activities/), a book by James Simmons. + +Coding standards + +We currently use [Python 3](https://docs.python.org/3/) for +the Sugar Toolkit and Sugar activity development. + +We recommend [flake8](https://pypi.python.org/pypi/flake8) as a +wrapper around the [pep8](https://www.python.org/dev/peps/pep-0008/) +and [pyflakes](https://pypi.python.org/pypi/pyflakes) code checkers. + +To check for flake8 warnings in the current directory, run this command +``` +python3 -m flake8 *.py +``` \ No newline at end of file diff --git a/docs/sugar-web-activity.txt b/docs/sugar-web-activity.txt new file mode 100644 index 0000000..cc3cd22 --- /dev/null +++ b/docs/sugar-web-activity.txt @@ -0,0 +1,288 @@ +# Fetched from https://raw.githubusercontent.com/sugarlabs/sugar-docs/master/src/web-activity.md on 2026-03-22T01:41:26.535286 + +Write your own Sugar web activity +================================= + +Choose your development environment + +You've got three choices to develop your own web activity for Sugar: + +- install packaged Sugar on Fedora, Debian, or Ubuntu; or, +- use Sugarizer. + +See [Setup a development environment](https://github.com/sugarlabs/sugar/blob/master/docs/development-environment.md) for more detail. + +*Sugarizer* simulates the Sugar environment in a browser. So you need +only a browser to start developing. It's the better choice if you've +no time or knowledge to learn how to install or build Sugar desktop +on a GNU Linux distribution but you're not in a Sugar desktop +environment, so your activity may only work in Sugarizer. + +Create the activity from the template + +On *packaged sugar*, after you have built the development +environment, clone the [Sugarizer repository](https://github.com/llaske/Sugarizer). +Install volo through npm. + + npm install -g volo + +Create an activity based on the default template + + cd sugarizer/activities + volo create MyActivity.activity ./ActivityTemplate + cd MyActivity.activity + +On *Sugarizer*, after you've cloned - or copied - the [Sugarizer +repository](https://github.com/llaske/Sugarizer), copy all content of +`activities/ActivityTemplate` directory in a new directory +`activities/MyActivity.activity`. + +Customize + +Choose a name for your activity. Write it in the activity name and +bundle-id in `activity/activity.info` of the new directory. + +![activity.info](images/activity-info.png "activity.info") + +And also in the title tag of `index.html`. + +![index.html](images/activity-html.png "index.html") + +On *packaged sugar*, install the activity for development + + python setup.py dev + +On *Sugarizer*, update the file `activities.json` of the Sugarizer directory: add a new line for your activity. Update id, name and directory values on this new line. + +![Sugarizer settings](images/sugarizer-json.png "Sugarizer settings") + +Now you should have a basic activity running! + +![Activity template](images/activity-template.png "Activity template") + +File structure + +In your new activity, you will find the following file structure: + + MyActivity.activity/ + |-- activity/ + | |-- activity.info + | `-- activity-icon.svg + |-- index.html + |-- css/ + | `-- activity.css + |-- js/ + | |-- activity.js + | `-- loader.js + |-- lib/ + |-- package.json + `-- setup.py + +* `activity/` contains information about your activity, including the + name, ID, and the icon. + +* `index.html` is where the elements that compose your activity are + defined. The template comes with a toolbar and a canvas where you + can place your content. + +* `js/activity.js` is where the logic of your activity lives. + +* `css/activity.css` is where you add the styling of your activity. + +Those are the files you'll modify in most cases. The others are: + +* `js/loader.js` configures the libraries paths and loads your + `js/activity.js` . You can add [non-AMD libraries](#non-amd%20libs) here. + +* `lib/` contains the libraries + +* `package.json` contains information about the libraries the activity + depends + +* `setup.py` lets you install your activity or make an installable + bundle with it + +Now you are ready to go ahead and develop your activity in the html, +js and css directories. + +Revision control your code + +For development you can initialize the repository as a git +repository. This will help you to track your changes. First use [git +init](https://www.kernel.org/pub/software/scm/git/docs/git-init.html) +to initialize the repository: + + git init + +With [git +status](https://www.kernel.org/pub/software/scm/git/docs/git-status.html) +you can show the available files in the folder they are still +untracked. Now add all the files in the directory besides the lib +folder and commit those changes, you can use git status again to see +the current state: + + git add . + git commit -a -m 'Initial import' + git status + +First steps + +Adding a button to the toolbar + +This simple example will show you how web activities are structured as +bits of HTML, CSS and JavaScript. + +You will need a SVG graphic for the button. Or you can use one from +the Sugar icon set at `lib/sugar-web/graphics/icons/`. For this +example, let's say you have one custom icon called `my-button.svg`. +Create a directory `icons/` inside your activity and place the SVG +file inside. Then do the following steps. + +In `index.html`, add a new <button> element inside the toolbar: + + + +In `css/activity.css`, define the button style: + + #main-toolbar #my-button { + background-image: url(../icons/my-button.svg); + } + +In `js/activity.js`, add a callback for the button: + + var myButton = document.getElementById("my-button"); + myButton.onclick = function () { + console.log("You clicked me!"); + } + +Adding HTML content dynamically + +Soon you will find that adding content to the HTML as we did with the +toolbar button in the previous section, is very limited. You'll want +to add HTML elements on the fly, as the user interacts with the +activity, or as the data structures of your activity logic change. +There are several options to archive this. Most of the time you'll +end up using a mix of them, so it is important to know them all. + +First, it is possible to create HTML elements and append them to other +HTML elements using JavaScript. This is called "manipulating the +DOM". + +For example, to create a new div with class 'my-div', and append it to +the canvas div, you can do: + + myElem = document.createElement('div'); + myElem.className = "my-div"; + var canvas = document.getElementById("canvas"); + canvas.appendChild(myElem); + +But it is a pain to do that for large HTML structures. Writing HTML +directly is much better: + + var canvas = document.getElementById("canvas"); + canvas.innerHTML += + '' + + 'Tom' + + 'Chris' + + 'Donald' + + ''; + +Nice, that saves us many JavaScript lines. But what if the HTML +depends on your data? Let's say you have an array of names and you +want one <li> per name, as in the previous example. You have +two options: 1. go back to use the JavaScript methods for DOM +manipulation, or 2. use a template system. + +There are many template systems out there, and you can use whatever +you like. Let's try [mustache](http://mustache.github.io/) here. + +Add mustache to your activity: + + volo add mustache + +Import mustache in your `js/activity.js`: + + var mustache = require("mustache"); + +Use it: + + var template = + '' + + '{{#names}}' + + '{{ name }}' + + '{{/names}}' + + ''; + + var data = {names: [{name: "Laura"}, {name: "Joao"}, + {name: "Willy"}, {name: "Sandra"}]}; + + var containerElem = document.getElementById("container"); + containerElem.innerHTML = mustache.render(template, data); + +Debugging + +If you want to inspect the code, you can press ctrl+shift+I while your +Activity is running. + +The inspector is a very useful tool for many things. For example, you +can edit the activity CSS or HTML, and interactively see how it is +affected. Or you can execute JavaScript commands in the console. + +![Activity inspector](images/activity-inspector.png "Activity inspector") + +It has also more advanced tools for JavaScript debugging. They are +nicely documented here: +. + +Keeping Sugar libraries up to date + +The activity depends on the +[sugar-web](http://github.com/sugarlabs/sugar-web) library +that provides the Sugar API and the Sugar look & feel. + +This means that if there are changes to the library you have to update +your local copy. You can do this with running +the following command inside the activity directory: + + volo add -f + +Using other JavaScript libraries + +AMD-ready + +You can easily add AMD-ready libraries with volo. For example, to add +[RaphaelJS](http://raphaeljs.com/): + + $ volo add DmitryBaranovskiy/raphael + Downloading: https://codeload.github.com/DmitryBaranovskiy/raphael/legacy.zip/v2.1.2 + Installed github:DmitryBaranovskiy/raphael/v2.1.2 at lib/raphael.js + AMD dependency name: raphael + +Then in `js/activity.js` you can use it: + + var raphael = require("raphael"); + +non-AMD libs + +Please, refer to +RequiresJS [shim section](http://requirejs.org/docs/api.html#config-shim), +then you can add your shim section in `js/loader.js` + +Ready to release + +Before your first release, you should: + +* make your activity unique in the Sugar interface by changing your + activity icon activity/activity-icon.svg . Or if you don't have + graphics skills, you can ask in the community if someone can do it. + +After that, if the activity works with Sugar 0.112 or earlier, on +*packaged-sugar* you may make a bundle and upload it to the Sugar +Activity Library . + + python setup.py dist_xo + +With **Sugarizer**, you can directly publish the XO bundle. So, just zip the content of your `activities/MyActivity.activity` directory and rename the `.zip` file to a `.xo` file. + +For further releases, you should update the activity_version in +`activity/activity.info`. \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 6d4e637..7c3bd12 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,3 +19,4 @@ httpx pydantic-settings sentence-transformers python-dotenv +requests diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 0000000..ab42b9b --- /dev/null +++ b/scripts/__init__.py @@ -0,0 +1,3 @@ +""" +Scripts package for Sugar-AI operational tasks. +""" diff --git a/scripts/fetch_sugar_docs.py b/scripts/fetch_sugar_docs.py new file mode 100644 index 0000000..e026abd --- /dev/null +++ b/scripts/fetch_sugar_docs.py @@ -0,0 +1,295 @@ +""" +Dynamic document fetching system for Sugar-AI. +Fetches Sugar documentation from GitHub and converts to plain text for RAG. +""" + +import os +import requests +import re +from datetime import datetime +from pathlib import Path +from typing import List, Dict +import logging + +# Setup logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + +# Documentation sources to fetch +DOCS_TO_FETCH = [ + { + "url": "https://raw.githubusercontent.com/sugarlabs/sugar-docs/master/src/desktop-activity.md", + "filename": "sugar-desktop-activity.txt" + }, + { + "url": "https://raw.githubusercontent.com/sugarlabs/sugar-docs/master/src/web-activity.md", + "filename": "sugar-web-activity.txt" + }, + { + "url": "https://raw.githubusercontent.com/sugarlabs/sugar-docs/master/src/contributing.md", + "filename": "sugar-contributing.txt" + } +] + + +def strip_markdown_headers(text: str) -> str: + """ + Convert markdown headers to plain text by removing # symbols. + Converts "# Header" to "Header", "## Subheader" to "Subheader", etc. + """ + # Replace markdown headers (# symbols) with plain text + text = re.sub(r'^#+\s+', '', text, flags=re.MULTILINE) + return text + + +def remove_html_tags(text: str) -> str: + """Remove HTML tags from text.""" + # Remove HTML tags + text = re.sub(r'<[^>]+>', '', text) + return text + + +def markdown_to_plaintext(markdown_content: str) -> str: + """ + Convert markdown content to clean plain text. + Strips markdown headers and removes HTML tags. + """ + # First strip markdown headers + text = strip_markdown_headers(markdown_content) + + # Remove HTML tags + text = remove_html_tags(text) + + # Clean up excessive whitespace + # Replace multiple newlines with double newline + text = re.sub(r'\n{3,}', '\n\n', text) + + # Strip leading/trailing whitespace + text = text.strip() + + return text + + +def fetch_document(url: str, github_token: str = None) -> Dict[str, any]: + """ + Fetch a document from a GitHub URL. + + Args: + url: GitHub raw content URL + github_token: Optional GitHub API token for authentication + + Returns: + Dict with 'success', 'content', and 'error' keys + """ + try: + headers = {} + if github_token: + headers['Authorization'] = f'token {github_token}' + + logger.info(f"Fetching document from: {url}") + response = requests.get(url, headers=headers, timeout=10) + + if response.status_code == 200: + logger.info(f"Successfully fetched: {url}") + return { + "success": True, + "content": response.text, + "error": None + } + elif response.status_code == 404: + error_msg = f"Document not found (404): {url}" + logger.error(error_msg) + return { + "success": False, + "content": None, + "error": error_msg + } + else: + error_msg = f"Failed to fetch {url}: HTTP {response.status_code}" + logger.error(error_msg) + return { + "success": False, + "content": None, + "error": error_msg + } + + except requests.ConnectionError as e: + error_msg = f"Network error fetching {url}: {str(e)}" + logger.error(error_msg) + return { + "success": False, + "content": None, + "error": error_msg + } + except requests.Timeout: + error_msg = f"Request timeout fetching {url}" + logger.error(error_msg) + return { + "success": False, + "content": None, + "error": error_msg + } + except Exception as e: + error_msg = f"Unexpected error fetching {url}: {str(e)}" + logger.error(error_msg) + return { + "success": False, + "content": None, + "error": error_msg + } + + +def save_document(content: str, filename: str, docs_dir: str = "docs") -> Dict[str, any]: + """ + Save a document to the docs directory. + + Args: + content: Plain text content to save + filename: Target filename + docs_dir: Target directory (default: 'docs') + + Returns: + Dict with 'success' and 'filepath' keys + """ + try: + # Ensure docs directory exists + Path(docs_dir).mkdir(parents=True, exist_ok=True) + + filepath = os.path.join(docs_dir, filename) + + with open(filepath, 'w', encoding='utf-8') as f: + f.write(content) + + logger.info(f"Saved document: {filepath}") + return { + "success": True, + "filepath": filepath + } + + except Exception as e: + error_msg = f"Error saving {filename}: {str(e)}" + logger.error(error_msg) + return { + "success": False, + "filepath": None, + "error": error_msg + } + + +def fetch_all_docs(github_token: str = None, docs_to_fetch: List[Dict] = None) -> Dict[str, any]: + """ + Fetch all Sugar documentation from GitHub and save as plain text files. + + Args: + github_token: Optional GitHub API token for authentication + docs_to_fetch: List of docs to fetch (default: DOCS_TO_FETCH) + + Returns: + Dict containing fetch results and summary + """ + if docs_to_fetch is None: + docs_to_fetch = DOCS_TO_FETCH + + timestamp = datetime.now().isoformat() + results = { + "success": True, + "timestamp": timestamp, + "fetched_docs": [], + "failed_docs": [], + "errors": [] + } + + logger.info(f"Starting document fetch at {timestamp}") + logger.info(f"GitHub token present: {bool(github_token)}") + + for doc in docs_to_fetch: + url = doc["url"] + filename = doc["filename"] + + # Fetch document + fetch_result = fetch_document(url, github_token) + + if not fetch_result["success"]: + results["failed_docs"].append(filename) + results["errors"].append(fetch_result["error"]) + continue + + # Convert markdown to plain text + plain_text = markdown_to_plaintext(fetch_result["content"]) + + # Add header with source and timestamp + header = f"# Fetched from {url} on {timestamp}\n\n" + final_content = header + plain_text + + # Save document + save_result = save_document(final_content, filename) + + if save_result["success"]: + results["fetched_docs"].append(filename) + logger.info(f"Successfully processed: {filename}") + else: + results["failed_docs"].append(filename) + results["errors"].append(save_result.get("error", "Unknown error")) + + # Determine overall success + if results["failed_docs"]: + results["success"] = False + + return results + + +def main(): + """ + Main entry point for standalone script execution. + Can be called as: python scripts/fetch_sugar_docs.py + """ + # Get GitHub token from environment (optional) + github_token = os.getenv("GITHUB_TOKEN", None) + + if github_token: + logger.info("Using GitHub token from environment") + else: + logger.info("No GitHub token provided. Using unauthenticated requests (subject to rate limits)") + + # Fetch all documents + results = fetch_all_docs(github_token=github_token) + + # Print summary + total_docs = len(results["fetched_docs"]) + len(results["failed_docs"]) + + print("\n" + "="*60) + print("SUGAR-AI DOCUMENT FETCH SUMMARY") + print("="*60) + print(f"Timestamp: {results['timestamp']}") + print(f"Total documents attempted: {total_docs}") + print(f"Successfully fetched: {len(results['fetched_docs'])}") + print(f"Failed: {len(results['failed_docs'])}") + + if results["fetched_docs"]: + print("\nFetched documents:") + for doc in results["fetched_docs"]: + print(f" ✓ {doc}") + + if results["failed_docs"]: + print("\nFailed documents:") + for i, doc in enumerate(results["failed_docs"]): + print(f" ✗ {doc}") + if i < len(results["errors"]): + print(f" Error: {results['errors'][i]}") + + print("\n" + "="*60) + + # Return summary message + if results["success"]: + print(f"Fetched {len(results['fetched_docs'])} docs successfully") + return 0 + else: + print(f"Fetch completed with {len(results['failed_docs'])} errors") + return 1 + + +if __name__ == "__main__": + exit(main())