diff --git a/.github/workflows/workflow.yml/workflow.yml b/.github/workflows/workflow.yml/workflow.yml new file mode 100644 index 000000000..167ae11cf --- /dev/null +++ b/.github/workflows/workflow.yml/workflow.yml @@ -0,0 +1,7 @@ +steps: + - name: My step + env: + API_KEY: ${{ secrets.APIKEY }} + run: | + # You can use the secret as an environment variable in this step + echo "API key is $API_KEY" \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..a7a729c97 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,37 @@ +#simple_server exoframe login https://exoframe.xf.mkrs.link -k /Users/fawaztarar/Documents/makers/key.pem + + + +# Imagine this file as a recipe for setting up a virtual computer. + +# Dockerfiles typically start with a 'base image'. There are loads of these +# and you can find them at hub.docker.com. +# We're going to use a base image for Python veresion 3.11 +FROM python:3.11 + +# This base image contains essentially everything necessary for a 'virtual +# computer'. It has a terminal, certain basic commands, and of course Python. + +# We run a command to install `pipenv` +RUN pip install pipenv + +# We'll need our app's files in the container in order to be able to run them! +# We copy them in from the current directory to the folder `/app` in +# our virtual computer. Reminder `.` means 'the current directory' +COPY . /app + +# We set the working directory for commands from this point on +WORKDIR /app + +# We run `pipenv install` to install our project's dependencies. Since we've +# copied in our `Pipfile`, `pipenv` will use that to get a list of dependencies. +# We include a couple of extra options suitable for deployment. +RUN pipenv install --system --deploy + +# At this point we've set up our virtual computer, but we've not _yet_ run our +# application. And we're not going to! We're just setting up the container +# so that it's ready to do so when we tell it. + +# So we're going to tell Docker here that when we _do_ want to run it, this is +# what it should run: +CMD ["python", "app.py"] diff --git a/Pipfile b/Pipfile new file mode 100644 index 000000000..0757494bb --- /dev/null +++ b/Pipfile @@ -0,0 +1,11 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] + +[dev-packages] + +[requires] +python_version = "3.11" diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 000000000..54a707836 --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,20 @@ +{ + "_meta": { + "hash": { + "sha256": "ed6d5d614626ae28e274e453164affb26694755170ccab3aa5866f093d51d3e4" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.11" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": {}, + "develop": {} +} diff --git a/README.md b/README.md index 5337d93c8..ed40cdb22 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,48 @@ -# chatgpt-retrieval +## Chatbot for makers.tech + +openAI chatgpt3 free tier integration +Chatbot data from data files PDF, text +chatbot data extraction from makers.tech + + +## update Constants +# Replace with your own OpenAI API Key https://platform.openai.com/account/api-keys +# and rename this file to constants.py. +#APIKEY = "sk-" + -Simple script to use ChatGPT on your own files. -Here's the [YouTube Video](https://youtu.be/9AXP7tCI9PI). ## Installation -Install [Langchain](https://github.com/hwchase17/langchain) and other required packages. -``` -pip install langchain openai chromadb tiktoken unstructured -``` -Modify `constants.py.default` to use your own [OpenAI API key](https://platform.openai.com/account/api-keys), and rename it to `constants.py`. - -Place your own data into `data/data.txt`. - -## Example usage -Test reading `data/data.txt` file. -``` -> python chatgpt.py "what is my dog's name" -Your dog's name is Sunny. -``` - -Test reading `data/cat.pdf` file. -``` -> python chatgpt.py "what is my cat's name" -Your cat's name is Muffy. -``` +1. pipenv install + +2. pinev shell + +3. pip install pytest + + +3. pip install flask + +4. pip install beautifulsoup4 + + pip install requests beautifulsoup4 + + +5. pip install PyPDF2 + +6. pip install scrapy + +7. pytest + +8. pip install aiofiles + +9. pip install elasticsearch + +10. psql CREATE DATABASE chatbot_db; + +11. pip install Flask-SQLAlchemy psycopg2-binary + +12. pip install pdfplumber + + diff --git a/chatgpt.py b/chatgpt.py index ebf95741c..9c4b979e3 100644 --- a/chatgpt.py +++ b/chatgpt.py @@ -1,52 +1,91 @@ -import os -import sys +import os import openai -from langchain.chains import ConversationalRetrievalChain, RetrievalQA -from langchain.chat_models import ChatOpenAI -from langchain.document_loaders import DirectoryLoader, TextLoader -from langchain.embeddings import OpenAIEmbeddings -from langchain.indexes import VectorstoreIndexCreator -from langchain.indexes.vectorstore import VectorStoreIndexWrapper -from langchain.llms import OpenAI -from langchain.vectorstores import Chroma - -import constants - -os.environ["OPENAI_API_KEY"] = constants.APIKEY - -# Enable to save to disk & reuse the model (for repeated queries on the same data) -PERSIST = False - -query = None -if len(sys.argv) > 1: - query = sys.argv[1] - -if PERSIST and os.path.exists("persist"): - print("Reusing index...\n") - vectorstore = Chroma(persist_directory="persist", embedding_function=OpenAIEmbeddings()) - index = VectorStoreIndexWrapper(vectorstore=vectorstore) -else: - #loader = TextLoader("data/data.txt") # Use this line if you only need data.txt - loader = DirectoryLoader("data/") - if PERSIST: - index = VectorstoreIndexCreator(vectorstore_kwargs={"persist_directory":"persist"}).from_loaders([loader]) - else: - index = VectorstoreIndexCreator().from_loaders([loader]) - -chain = ConversationalRetrievalChain.from_llm( - llm=ChatOpenAI(model="gpt-3.5-turbo"), - retriever=index.vectorstore.as_retriever(search_kwargs={"k": 1}), -) - -chat_history = [] -while True: - if not query: - query = input("Prompt: ") - if query in ['quit', 'q', 'exit']: - sys.exit() - result = chain({"question": query, "chat_history": chat_history}) - print(result['answer']) - - chat_history.append((query, result['answer'])) - query = None +import requests +from flask import Flask, request, jsonify, render_template +from flask import current_app +import json +from PyPDF2 import PdfReader +from flask_sqlalchemy import SQLAlchemy +import pdfplumber +from bs4 import BeautifulSoup +from lib.config import Config +from lib.db_models import db +from lib.db_models import ExtractedData +from lib.db_models import create_app, db, store_data, query_data +from lib.search import simple_search +from flask import Flask +from lib.config import Config +from lib.db_models import db + + + +from flask import Flask +from lib.db_models import db + + +def create_app(): + app = Flask(__name__) + app.config.from_object(Config) + + # Initialize extensions with the app instance + db.init_app(app) + + return app + + + + + + + +@app.route('/') +def chatbot(): + return render_template('ai_chatbot.html') + +@app.route('/query', methods=['POST']) +def handle_query(): + data_request = request.json + query = data_request.get('query') + + if not query: + return jsonify({"error": "No query provided"}), 400 + + context = simple_search(query) + + try: + response = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": query}, + {"role": "assistant", "content": context}, + ] + ) + answer = response.choices[0].message['content'] + except Exception as e: + return jsonify({"error": str(e)}), 500 + + return jsonify({"answer": answer}) + + + +@app.route('/test_error') +def test_error(): + raise Exception('Test exception') + + +if __name__ == '__main__': + app.run(debug=False, port=5001) # Set to False in production + + + + + + + + + + + + diff --git a/constants.py.default b/constants.py.default deleted file mode 100644 index 0934537a7..000000000 --- a/constants.py.default +++ /dev/null @@ -1,3 +0,0 @@ -# Replace with your own OpenAI API Key https://platform.openai.com/account/api-keys -# and rename this file to constants.py. -APIKEY = "" diff --git a/data/cat.pdf b/data/cat.pdf deleted file mode 100644 index e5148a11b..000000000 Binary files a/data/cat.pdf and /dev/null differ diff --git a/data/data.txt b/data/data.txt index 99c091a83..53d9e28e5 100644 --- a/data/data.txt +++ b/data/data.txt @@ -1 +1,1220 @@ -My dog's name is Sunny. +1 +1 +Welcome +to Makers + +2 +2 +Makers enabled me to +change my life. I wanted + +to do something which I was +passionate about, so I made +the scary decision to change +careers and pursue software +development. I now work +at one of the hottest tech +startups and work with some +of the most brilliant and +talented people Œ building +amazing solutions, learning +at an accelerated pace and +I love I what do. I never +thought I would actually say +that I look forward to going +to work. +fi +fi +COSTAS KITEOU, MAKERS + +3 +3 +1. A Message from our CEO, Evgeny Shadchnev +3. Makers History, Vision & Mission +5. What we believe: a message from Dana +7. Getting set up! +8. Prepare for PreCourse +9. Tips for remote working +12. Finance +11. Responsibilities +13. Curriculum +16. Blogging +17. Meet Makers +Contents + +1 +1 +The time I spent at Makers was not +easy. During the course there was a +constant feeling of being on your back +foot, falling behind or otherwise just +not understanding anything. It was only +after I completed the course did I see +the real merit of what I had been taught. +Makers doesn™t just teach you how to +code, it teaches you how to be a coder. +From soft skills to team management, to +understanding acceptance and rejection +during the job hunt, Makers has given +me a well-rounded view of the industry +and one that I hope will continue long +into my career. The holistic approach to +development is a wonderful bonus as well, +and all the coaches and staff are friendly +and helpful. +fi +fi +RUPERT ADAMS + +1 +1 +Major congratulations for joining Makers - +we™re really looking forward to helping you +˜nd your feet as a developer. I wanted to say +a quick hello and explain why we™re excited +to have you on board. +First of all, we pride ourselves on being +much more than just a coding bootcamp. +We know that Makers changes lives. You +can see this from reading the stories of +various students - like this +mother-of-four +Kate Morris +, or Jordan Roberts who +left law +to retrain into tech +, or the countless other +student stories you can +read on our blog. + +We™re looking forward to hearing your story. +Secondly, we know that your relationship +with Makers doesn™t end when the training +does. ‚Once a Maker, Always a Maker ™ is +something we pride ourselves on, and you +can +read more about our alumni community +here. + We™re excited that you™ll get to join this +group once you ˜nish training. +A message +from our +founder, +Evgeny +Shadchnev +Lastly, I wanted to share why +I started Makers. Essentially, +I believed that there was a +better way to link education to +employment. Our goal was (and +still is) to help people become +developers and start exciting new +careers. Yet, our work has bigger +implications in terms of the impact +on the society that our developers +will have. You can +read more about +my thoughts on that here. + + +2 +2 +Essentially, the tech +revolution is affecting + +every single industry + +and every single business, + +and it ™s only getting started. +Hopefully you™ll learn from us, just by +virtue of being on campus, that looking + +after each other matters. That everyone + +matters, and everyone should have a + +voice. That pro˜t shouldn™t be the only + +motive in life. Those feelings have a + +place in the professional world. That + +mental health is just as important as + +physical health. +That it ™s okay not to work +with an employer that is + +harming the society and + +the environment. That it ™s + +important to pay it forward, + +just because you can. That + +we need to learn to have + +difficult conversations + +about equality, reverse + +sexism, invisible privilege + +and other difficult topics, + +in order to move forward + +as the tech industry and + +society in general. +You will build the next generation +of technology, guided by values you + +pick up here, at Makers. I hope you + +build good things. +Best of luck, Evgeny + +3 +3 +In 2013, co-founders Evgeny + +and Rob met at Forward +Labs and got to talking about +the problems people have +learning to code. Evgeny +had trained as a software +developer at Imperial College +and as he was trying to build +out his team he discovered a +lack of qualified developers +on the market. Rob had +been through the arduous +process of teaching himself to +programme through a book +on Objective C. + +They both saw that traditional +education was broken. The vision they +had was for Makers to be an alternative +to university and a vocational route +into a job as a software developer. +Makers would train people to + +become software engineers in only +three months Š it would be a platform + +to provide the world™s most e˚cient +introduction to programming +principles. +Back then, Rob wrote what still + +rings true today: +fiMakers is not for everyone. +Some people are okay +investing the time and money +into getting a computer +science degree and that ™s +great. Others are okay with +the inefficiencies in teaching +themselves and just want +to learn enough to get by. +For the rest, we™re building +Makers.fl +Makers +History, + +Vision + +& Mission +3 + +4 +4 +We™re + +proud + +to have: +Ł + +Trained over 1700 Makers (on + +average 35% of them were women); + +Ł + +Connected them to over 250 of +London™s top technology companies +(Deliveroo, British Gas, Starling Bank, +Financial Times, Compare The Market. +com, Tesco, among others); + +Ł + +Grown to a team of over 40 people, +with the philosophy of ‚ trust over fear ™; + +Ł + +Been included in 2018™s UpScale +cohort, which aims to support the +growth of the UK™s most exciting and +innovative companies Š we™re excited +to leverage that to widen our impact +on the tech industry. +We™re proud to have proven + +that a complete beginner + +can get an amazing job + +in software development + +in just a few months. +In the coming years we™ll double-down on +making our course even more accessible and +inclusive; helping our Makers learn a wider +range of skills; working with even more hiring +partners across all industries, so that many +more thousands of newly trained software +developers can start careers they truly love. + +5 +5 +Hi, I™m Dana. I™ve worked in the tech +industry for almost a decade. I have been +with Makers since 2014. Here, we believe +that world-class developers are not only +technically skilled but are also strong +communicators and collaborators. This +is why we have an emotional intelligence +curriculum, which I™ve developed. + +I wanted to take a few +moments to welcome you +onto the course and to set our +expectations around mindset. +Makers is a fast-moving environment. +Students almost always feel behind in the +amount that they have to absorb as there™s +so much to learn each day. The course is +designed that way. I encourage students to +try to make peace with doing your best, as +stressing about not being able to complete +everything ultimately slows you down. +What we +believe: +A message +from Dana, +our Chief +Joy Officer +5 + +6 +6 +Learning is the point, not +completing everything. It™s +easy to get caught up in the +Makers journey and forget +this. +As long as you™re doing your best and +focusing on learning, not completion, +you™re likely to be progressing in a more +optimal way. There™s a ˜ne line between +pushing yourself and over-doing + +it. With proper attention to self-care, +you can be sensitive to that line and +avoid burnout. +At Makers we value a sense of +autonomy in work and learning. +Students engage with self-led learning +which means they are in control of +their journeys. You™re expected to set +and take charge of your own goals, +which is quite a new concept for +most (especially those coming from +traditional education, where there are +generally clear hoops to jump through). +It can take some time adjusting to that +and can sometimes leave you feeling +out of control. +Certainty is a core human need but is +often a rare commodity at Makers, as +the curriculum is deliberately designed +to keep you on your toes as you have + +a series of coding curve-balls thrown + +at you˛Š˛being aware of this helps +prevent burnout from creeping in. +Remind yourself to trust the process +and reach for what is certain in your + +life: your support network (Makers sta˝ + +/resources, cohort members, family, +friends, hobbies, values etc.). +Drop expectations that you might + +have had about how the course should +go for you and try to be present, while +allowing the process to take you on +the journey. If you value a high level +of control in your life then this is the +perfect opportunity to practice being +˙exible and adaptable as those qualities +rank high in what makes a great +developer. +Welcome to your Makers +journey. I™m looking forward +to supporting you. + +7 +7 +Getting +Set up! +7 + +8 +8 +S +et +t +ing + +u +p + a + +c +od +i +ng +e +n +vir +onment +If you are using a Mac, + a + + +of the tools +you will need to create a dev + +environment on your computer. +Henry and Emma, + two of the Makers +assistant coaches, have written about + +setting up a Cloud9 AWS developer +environment that +you can use on any operating systems +- +if you are a Windows user, we +recommend using this environment. +Passing the PreCourse +Nikesh™s post + on passing the PreCourse +is a helpful summary on what the goal + +of the PreCourse is, and how an + +e˝ectively ful˜lled PreCourse can set + +you up for success later on with our + +hiring partners. +- You are expected to work independently +as much as possible +- You should be working on improving +your problem solving processes +- PreCourse is an opportunity to get +to know your cohort and learn how +to learn from others +We hope that you found these +links useful. +If you have any questions about Makers, +don™t hesitate to +contact us + - and please, + +do come around for a Q&A session, or + +a Demo Day, even before your course + +starts, we would love to have you. +Prepare for +PreCourse +8 +On the first day of the PreCourse you will receive +a link to Teachable which will contain all the + +PreCourse material. You will also have an invitation + +to join Slack - our chat tool. This is what you will be + +using for all communication. + +9 +9 +Tips for +remote +working +From Diana Constantinou +1. Organisation + +Structure to your day is important. +Routines are important for +normalising this new way of working. +Planning out your day with consistent +tasks will get you into a ˙ow with +working. This includes the planning + +of breaks, lunch and physical activity. +Create a good routine with +short exercise breaks. +Moving and stretching our bodies +energises the brain. The truth is, +endorphins are produced when +exercising, which increases happiness +and interest levels. +2. Create a workspace + +Obviously an o˚ce is ideal, but +not always possible. Try to create +an o˚ce space within the space +you have available This will set out +clearly when you are working (in the +space); and when you are relaxing. +Prepare a conducive workspace. Apart +from having a high-speed internet +connection and a reliable router, get +a comfortable chair and think about a +second monitor or in the least a clear +display. You are going to be staring at +the screen all day, o˝ and on. +If you cohabit try closing the +door when you are trying to +focus. +Clear communication is important. +Creating rules around video calls, or +focus time reduces mishaps due to +poor communication. For example, +when the door is closed or when my +headphones are on–.. +It is a good habit to mute your +microphone during conference calls +(unless you™re speaking) to minimise +the amount of audio feedback and +random sounds that interrupt the +conversation. + +10 +10 +3. Research/ stay up to date + +The good news is, you are not alone. +The world is transitioning from physically +working to remote. There are a load of +really useful blogs and websites. Try to +follow some people that are similar to +you and your context, follow them to +pick up tips. Again, just a list to get you +started and in no way complete: +The Five Levels of Remote Working, +a blog aimed to maximise your +effectiveness as a remote worker +The Stress of Remote Working, a blog +by a software developer who has +worked from home for some time now +How to thrive working from home, a +blog on some tips for effective home +working +What is it like to work as a remote +software developer, a blog by current +software developers working remotely +NHS blog - every mind matters, some +medical information on working from +home +Just Giving blog on working from +home +BBC news article with advice for +working from home during COVID-19 +4. Dress properly + +Resist the temptation to stay in your +pyjamas 24/7. Remember how you +dress will also a˝ect your mental +perception. It ™s important to follow +routines you would have normally put +in place. Having a breakfast routine, +getting ready for work, and o˚cially +getting started. +5. Concentration + +Cut o˝ slack: that said, know when +to shut o˝ Slack, or any other way +to communicate (like email) and just +focus on the work. So many hours of +productivity can be lost just by reading +other people conversations. +Consider keeping the television turned +o˝. It can be a big distraction. The +radio or music on YouTube might be +better if you are the type who works +well with some background music. +Relaxing music creates an unwinding +mood, whereas up tempo soundtracks +keep the energy level high. But you +just need to be aware of how noise is +a˝ecting your productivity. +Stay o˝ social media. We are all guilty +of this one from time to time, and social +media can be one giant time-waster if +you™re not careful. + +11 +11 +6. For Parents + +Keep children in your plans. Due +to school closure, children may +also be around at home. Plan and +schedule some activities to occupy +them while you work. +Have staggered lunchtime +to spend with children; for +instance, one partner can +have a lunch break at 11 +am-12 pm, and the other +breaks at 12 pm-1 pm, so +2 hours are spent with +children. +Think about taking Saturday as +a school day, and giving them +one work day o˝, better chosen +for your busiest work day. It is +important to keep at least one +common weekend day together. +For all we know, working with +children around us might become +the new norm. + +12 +12 +Finance +12 + +13 +13 +How to pay your remaining + +fees: +You can pay your ˜nal fees via + +this link. +Note: +Your fees need to be initiated +, + +they +will not be taken automatically + +from your account. +When are the final fees due? +On the +first day of your PreCourse, + + +or before if you prefer. +What will happen if my +fees are not paid? +You will +not be allowed to join the +full-time course + until your ˜nal fees are +paid. +Are there any other +payment alternatives? +If you have had problems using +GoCardless you can also pay via bank +transfer, please follow the steps below: +> Add your ˜rst name initial and surname +and full-time start date as your reference +E.g. ASmith09 (September full-time) +> Account name: Makers Academy +> Sort code: 20-44-91 +> Account number: 20254797 +How can I pay my fees + +from abroad? +There are two options when trying + +to pay from abroad: +Transferwise: + ˜nd out more + +here + +Can I pay using a credit card? +No, unfortunately we cannot accept +payments via credit cards. +If you have any problems with regards to paying your final fees, +please contact admissions@makers.tech and we™ll be happy to +help. + +14 +14 +Education at Makers is +designed to be empowering. +Being a software engineer +means looking at any problem +you encounter and believing +fiWith thoughtful effort, I can +solve this.fl + +We trust you with three responsibilities: +Ł + +Deciding what to aim for + +(your goals). +Ł + +Determining how to get there + +(your plan). +Ł + +Getting insight into your progress +(self-assessment). +We believe that you are the best + +person to lead these things. This + +way, when someone asks you how +it ™s going, you should know pretty +accurately how you™re doing as + +a software engineer, where you + +want to get to, and how you™re + +going to get there. +You will know when to put in more +e˝ort, when to slow down, change +focus, employ your strengths + +or strengthen your weaknesses. + +You will know when you are ready + +to apply for jobs. You™re in the + +driving seat. +What are your +responsibilities? +14 + +15 +15 +What are + +the coaches™ + +responsibilities? +So what™s our job? +Ł + +To support you with the above. +Ł + +To provide an environment +with the tools and motivation +necessary to achieve your goals. +It ™s our job to give you the +prompts and starting-points +to train the habits of planning +and self-assessment required +to be a successful self-led +learner. + +And then there™s the matter of making the +environment here as rich in opportunities +to learn as possible. + + +If you™re not sure whether your code +is readable or not, it should be easy to +learn how to determine that, execute it, +and then engage productively with the +task of improving your code. Moreover, +the atmosphere and people around you +should inspire you to be better +Not everyone will come here +knowing how to take control +of their own journey Š and +that™s fine. + +16 +16 +Curriculum +16 + +17 +17 +We have a carefully thought- +out curriculum which +advances you in software +development week by week. +Students learn in peer-to- +peer environments facilitated +by experienced facilitators. +Ł + +Students pair programme for the +majority of the day. +Ł + +Daily workshops & code reviews are +led by coaches. +Ł + +Access to the Makers Students Slack +community +Our course is designed to give you the +skills you need for working in a real +development team. From day one you™ll +be participating in stand-ups, retros and +pair programming. You™ll also learn about +TDD and agile, and through the course +you™ll manage your own projects using +di˝erent project management tools that +align with these principles. +Much of the course will be spent with +programming in Ruby, but the Makers +students ˜nish the course + language- +agnostic +: meaning that they will be able +to programme in any language. +Our course is constantly being developed +and re˜ned over time, but hopefully this +will give you an idea of what kinds of +things you™ll be working on. +FIND OUT MORE +A map of your +Makers Journey + +18 +18 +˜˚˛˝˙˚ˆˇ˚˘˘ + +˚ + + + ˆˆ˝˛˙ +˚ˆ˚ + +˚† + +˚“ + +˚‘ + +˚ + +˚† +˜˚˛˝˙ˆˇ˘ˆ + ˆ +˘˛˙ ˆ + ˆ +˘ˆ˚ ˆ +˚ ˘˘ + + +˝ˇˆ ˆ + ˘˝ˆ ˆ +˙˘ ˆ +˚˝ ˆ +˚ +˚ ˆ +˚˝ ˆ +˚ + ˆ +˚˚˙ ˆ +˚ +˘ ˚ +˚˛ˆ˚ +˝ˇ˝€˝ +˚ˆ˛ ˚ˆ ˘ˆ˘˘ ˆ +˛ˆ +˘˙ˆˇ˘ +˛ ˆ†˛ˆ˚˚ˆ˘˝ˆ +˘˝˚˚˚ˆ +˘ + +˘˘˝˚š + ˚ +—˚˘˘˚€ˆ•˚ +– +ƒ˘˚˝ˇ ˚ +€˘˝ ˆ +˘˙ˆ˝ˇ ˆ +˘‡ + +ˆ +˚ + ˆ‘˚˚ +ˆ‘˝˛˚ˆ‘ ˆ + ˘‡˝˘ˆ˝˚˚˝ ˚ˆ˛˝˘˙ ˆ +…—ˆˇ˘˝ˆ˛ˆ˘ˆ˚’˛˘ ˆ +˘ˆ’ ˆ +˚ ˚ +˘ ˚˝ˆ•˘ˆ˛˘ ˛˙ ˆ ˘ ˆ +’˛˝ +⁄ˆ˚€ +–˘˘ˆ˛˙ ˆ +ˇ˘˙ˆ‘˘˝ˆ˛˚˚ ˆ +˚˝ˆ˛˚˝ˆ†˚ ˆ +˝˚’˘˚˙ˆ⁄“š + +ˆ˚…ˆ˛ ˚ +˝ +‹˘˝ ˆ +’˘ ˆ +˘ˆ’˛˙ ˆ +’˘˙’˛˘˝ˆ’ ˆ +˘ˇˆ˘˝ˆ˘˙ +˙˚ˆ’ˇ˛˙˙˚ + ˆ +˛˙ˆ’˘ˆ˛˚˙ˆ˝˚ +˛˝ˆ +˛˚˝ˆˇ˝˙ˆ˚ + •ˆ ˚ +˝€˚˝˚ˇ ˚ + +›˝˚ ˛˝˘ˆ +˛ ˆ + +˘˙˛˚ˆ˛˙ˆ‘˘’ˆ +˘˙ˆ˚’˚˝ ˆ +˚ ˘ˇˆ ˘˝ˆ + ˆ +’˘ ˛˙˚š ˆ +˜˚˛˝˙ˆˇ˘ˆ +˚ ˆ +˝ ˚ˆ˛ ˆ +˛ + +ˆ ˆ +˛ +’˛ +˚˚ ˆ +˛ˆ’˘ +˚⁄ˆ +˛ +’˛ ˆ + + +˚‹ +–˘ + +˘ˆˇˇ +˘’˚˚ ˆ +˘ˆ +ˆ˛ˆ˚ˆ˛ ˆ +˚˛ƒˆ˙ ˆ + ˆ˘˙ˆ˛ + + +˚› +–˘ + +˘ ˆ + ˆ + ˝˘’˚˚ˆ˙ˆ˛ ˆ +˘ˆ + ˆ +˛ˆ˛ +ˆ˛ + +˚ † +–˙˛ +ˆ›˝ +’˛ˆ˚’˝˘ ˚ +˘˚‰ + +˚ „ +˚ˆˇˇˆ +˘˚ ˆ +ˆ +˘ˆ ˛ˆ˛ˆ˘’ ˆ +˚ + +˚ +–˙˛ +ˆ›˝ + +˚” +˘˝ˆ˙ˆ˛ˆ +˘ˆ + ˆ +˛ˆ’ +˘˙˚ˆ˘‘ˆ˛ˆ +˚ + +˙˘˙ ˆ +˚ˆ˛ + +˚‚ + + +ˆ˛ˆ˙ +˚ ˆ + ˛˚ˆ˚ˆ +˛ ˆ˙ˆ +‰˛ ˆ +‰˚˝ + +˚‘ + + +ˆ˛ˆ ˆ +ˆ +˘˝˚ˆ˛ ˆ +˛ˆ˛ + +˚™ + + +ˆ˛ ˆ +˙ +˚ˆ ˛˚ ˆ +˛ ˆ˙ ˆ +‰˛ ˆ +˙˘ˆ‰˚˝ ” + +˚“ + + +ˆ˛ ˆ +˚ˆ˛ +˙ ˆ + + +ˆ˛˙š ˚ +˚ +˝ˇ˚ +fi +˚ +˝˛Œ + ˇ˚ ˚ + + +šˆ˛˚ + ˚ +ˆ ˚ + ˚ +ƒ˚ +˜ +˚ +˜˚˛˝˙ˆ˝ˇˇ˚˘ˆ˙˙ +˚ˇ˙˜˚˘˛˝ + ˘ ˙ +˙ +˚˘ˇ˝ ˚˝˙ +˘ +˙˚˘˚˚˘ˆ˙ +˛ +ˆ˚ˆ€ +˚ +˚ +ˇ˝˙ +˛˝˙˙ +˝˙ +˘ˇ˙ˇ ˙ +˝˝˙ +ˇ˙˝˙“˝˝˘˙˝ +˘˚˘ˆ ˙ +˚˘˝˙ +˛ ˙ +˚ ˙ +˚˙ˇ˝˙˘˙ +ˇ˙˝˝˙˝˙ +˚˘ˇ ˙ +˚ˇ˙˙ +ƒˆ•šš˚˝‰˛ˆ˚ + ˚ + +˚ + +˙ ˙ +˝˝˙ ˝ ˙ + +˚Œ +˚ +•˝˙ +ˆ˝˙ ˇ˚˘ˆ˙˙ + +˘†ˇ˙ +˘ˆ˝˙ +˘ +˙˝† +˙˚˛˝˙˝˝˙˙ + ˝˘˙˝˘˙˙ + +˚ +‡˝ˆ +˙˚˚˘ˆ˙˝ ˝˘ˇ˙ +˝˘ ˙ +ˇ˙ + +˚† + +˚“ + +˚‘ +€˘‡ + +ˆˇ˚˛˝ ˆ +˛’ˆ‘˝ ˆ +˛˝˚˚˝ˆ˚˛ ˆ +˛˙ˆ + +ˆ˘ˆ +˚ˆ˙˚ˆ‘˘˝ ˆ +˚˝ ˚ˆ˛˙ ˆ +˘ˆ’˚ˆ +˚ +€˘‡ + +ˆ˚ˆ˚˚ ˆ +˚˝ ˚ˆ +˚ ˆ +˚˚ˆˇ +˚ˆ ˘ˆ +’˘ˆ’˘˚ ˆ + ˆ +’˘ ˛˙˚ˆ ˘‡˝˚ˆ +˚˝˚˚ˆ˙š +˚ ˆ +˘˝ˆ˚˚ˆ’ ’ +˚ ˆ +˚˝ ˚ˆ˛ ˆ ˆ +‡ ˆ +˘ˆ˛ˆ ˚˝˘˙˛ + ˆ + ˝˘˝ ˆ +˘’˚ˆ ˘˝˚ +‘ ˆ +˚’ˇˆ ˝˘‘˚˘˙˛ +ˆˇ˘ ˆ +˛ ˆ˘ˆˇ˝˚ˆ ˘š ˆ +ƒˆ•šš˚ˆ ˚ +˚ +˝ ˚ +˛˙˚˚ +˘ ˚ +Œ +18 + +19 +19 +At Makers we advise you to blog during your time +with us. Not only does it allow you to re˙ect on your +learning, it also makes a signi˜cant di˝erence in the +job-hunting process. +Find out more here +Makers + +Blogging + +Opportunity! +19 + +20 +20 +Meet the +Makers +community +20 + +21 +21 +GET TO KNOW + +The team +who helps +you find + +a job +READ +More +student +stories +GET TO KNOW + +Your +coaches +here + +22 +22 +Our community is what makes Makers. +From mentoring opportunities, to +social events to job-hunting, the alumni +community is there to support you +from day one at Makers, during the +course and throughout your journey + +in the world of software development. +Once + +a Maker, +a l ways +a Maker +22 + +23 +23 +Code + +of +Conduct +Our open-source Community +strives to be: +Friendly and patient +Welcoming +Considerate +Respectful +Careful in the words that we choose +Careful to understand why we disagree +This Code of Conduct +outlines our expectations +for participants within +the Makers community, as +well as steps for reporting +unacceptable behaviour. +We are committed to providing a +welcoming and inspiring community for +all, and expect our code of conduct to +be honoured. Anyone who violates this +Code of Conduct may be banned from +the community. Please take the time to +read the full Code of Conduct + +here. +23 + +24 +24 +We™re excited +to welcome +you very soon! +24 + diff --git a/data/makers_brochure.pdf b/data/makers_brochure.pdf new file mode 100644 index 000000000..17d9b7a00 Binary files /dev/null and b/data/makers_brochure.pdf differ diff --git a/data/makers_curriculum_2024.pdf b/data/makers_curriculum_2024.pdf new file mode 100644 index 000000000..ca281fad1 Binary files /dev/null and b/data/makers_curriculum_2024.pdf differ diff --git a/lib/__init__.py b/lib/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/config.py b/lib/config.py new file mode 100644 index 000000000..2399e1823 --- /dev/null +++ b/lib/config.py @@ -0,0 +1,6 @@ +import os + +class Config: + OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") + SQLALCHEMY_DATABASE_URI = 'postgresql:///chatbot_db' + SQLALCHEMY_TRACK_MODIFICATIONS = False \ No newline at end of file diff --git a/lib/data_handler.py b/lib/data_handler.py new file mode 100644 index 000000000..efb7027bf --- /dev/null +++ b/lib/data_handler.py @@ -0,0 +1,55 @@ +from PyPDF2 import PdfReader +import pdfplumber +import os +import json +from PyPDF2 import PdfFileReader + + + +def read_file(file_path): + # Determine the file type from the extension + _, file_extension = os.path.splitext(file_path) + + if file_extension.lower() == '.pdf': + return read_pdf_combined(file_path) + elif file_extension.lower() in ['.txt', '.md', '.html']: # Add other text file extensions if needed + return read_text_file_sync(file_path) + else: + return "Unsupported file format" + +def read_pdf(file_path): + text = '' + with open(file_path, 'rb') as file: + pdf_reader = PyPDF2.PdfFileReader(file) + for page_num in range(pdf_reader.numPages): + text += pdf_reader.getPage(page_num).extractText() + return text + +def read_text_file(file_path): + with open(file_path, 'r', encoding='utf-8') as file: + return file.read() + +def read_pdf_combined(file_path): + text = '' + with open(file_path, 'rb') as file: + pdf_reader = PyPDF2.PdfFileReader(file) + for page_num in range(pdf_reader.numPages): + page_text = pdf_reader.getPage(page_num).extractText() + if page_text: + text += page_text + else: + text = read_pdf_with_pdfplumber(file_path) + break + return text + +def read_pdf_with_pdfplumber(file_path): + text = '' + with pdfplumber.open(file_path) as pdf: + for page in pdf.pages: + text += page.extract_text() or '' + return text + +def read_text_file_sync(file_path): + with open(file_path, 'r') as file: + return file.read() + diff --git a/lib/data_loader.py b/lib/data_loader.py new file mode 100644 index 000000000..8cf458758 --- /dev/null +++ b/lib/data_loader.py @@ -0,0 +1,42 @@ +import os +import json +from PyPDF2 import PdfFileReader +import pdfplumber +from data_handler import read_file, read_pdf_combined, read_pdf_with_pdfplumber, read_text_file, read_pdf + + + + + + +# Load data from Scrapy's output (JSON file) +def load_scrapy_data(scrapy_output_file): + if os.path.exists(scrapy_output_file): + with open(scrapy_output_file, 'r') as file: + return json.load(file) + else: + return {} + +# Combine data from both Scrapy output and text/PDF files +def load_combined_data(data_folder, scrapy_output_file): + combined_data = load_scrapy_data(scrapy_output_file) + + for file_name in os.listdir(data_folder): + file_path = os.path.join(data_folder, file_name) + if file_name.endswith('.pdf'): + combined_data[file_name] = read_pdf(file_path) + elif file_name.endswith('.txt'): + combined_data[file_name] = read_text_file(file_path) + + return combined_data + +# Specify the paths +data_folder = '/Users/fawaztarar/Documents/chatgpt/chatgpt-retrieval/data' +scrapy_output_file = '/Users/fawaztarar/Documents/chatgpt/chatgpt-retrieval/myproject/myproject/spiders/output.json' + +# Load and combine data +data = load_combined_data(data_folder, scrapy_output_file) + + + + diff --git a/lib/database_connection.py b/lib/database_connection.py new file mode 100644 index 000000000..feb28f015 --- /dev/null +++ b/lib/database_connection.py @@ -0,0 +1,84 @@ +import os, psycopg +from flask import g, render_template, Flask +from psycopg.rows import dict_row + +app = Flask(__name__) + +# This class helps us interact with the database. +# It wraps the underlying psycopg library that we are using. + +# If the below seems too complex right now, that's OK. +# That's why we have provided it! +class DatabaseConnection: + + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + + + DEV_DATABASE_NAME = "chatbot_db" + TEST_DATABASE_NAME = "chatbot_db_test" + + def __init__(self, test_mode=False): + self.test_mode = test_mode + + # This method connects to PostgreSQL using the psycopg library. We connect + # to localhost and select the database name given in argument. + def connect(self): + try: + self.connection = psycopg.connect( + f"postgresql://localhost/{self._database_name()}", + row_factory=dict_row) + except psycopg.OperationalError: + raise Exception(f"Couldn't connect to the database {self._database_name()}! " \ + f"Did you create it using `createdb {self._database_name()}`?") + + # This method seeds the database with the given SQL file. + # We use it to set up our database ready for our tests or application. + def seed(self, sql_filename): + self._check_connection() + if not os.path.exists(sql_filename): + raise Exception(f"File {sql_filename} does not exist") + with self.connection.cursor() as cursor: + cursor.execute(open(sql_filename, "r").read()) + self.connection.commit() + + # This method executes an SQL query on the database. + # It allows you to set some parameters too. You'll learn about this later. + def execute(self, query, params=[]): + self._check_connection() + with self.connection.cursor() as cursor: + cursor.execute(query, params) + if cursor.description is not None: + result = cursor.fetchall() + else: + result = None + self.connection.commit() + return result + + CONNECTION_MESSAGE = '' \ + 'DatabaseConnection.exec_params: Cannot run a SQL query as ' \ + 'the connection to the database was never opened. Did you ' \ + 'make sure to call first the method DatabaseConnection.connect` ' \ + 'in your app.py file (or in your tests)?' + + # This private method checks that we're connected to the database. + def _check_connection(self): + if self.connection is None: + raise Exception(self.CONNECTION_MESSAGE) + + # This private method returns the name of the database we should use. + def _database_name(self): + if self.test_mode: + return self.TEST_DATABASE_NAME + else: + return self.DEV_DATABASE_NAME + +# This function integrates with Flask to create one database connection that +# Flask request can use. To see how to use it, look at example_routes.py +def get_flask_database_connection(): + if not hasattr(g, 'flask_database_connection'): + g.flask_database_connection = DatabaseConnection( + test_mode=os.getenv('APP_ENV') == 'test') + g.flask_database_connection.connect() + return g.flask_database_connection diff --git a/lib/db_models.py b/lib/db_models.py new file mode 100644 index 000000000..613e33085 --- /dev/null +++ b/lib/db_models.py @@ -0,0 +1,43 @@ +from flask_sqlalchemy import SQLAlchemy +from chatgpt import app + + +db = SQLAlchemy() + + + + + + + +## DATABASE +app.config['SQLALCHEMY_DATABASE_URI'] = 'postgresql:///chatbot_db' +app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False +db = SQLAlchemy(app) + + + + +class ExtractedData(db.Model): + id = db.Column(db.Integer, primary_key=True) + source = db.Column(db.String, nullable=False) # e.g., 'web' or 'file' + content = db.Column(db.Text, nullable=False) + + def __repr__(self): + return f'' + +@app.before_first_request +def create_tables(): + db.create_all() + +def store_data(source, content): + new_data = ExtractedData(source=source, content=content) + db.session.add(new_data) + db.session.commit() + +def query_data(keyword): + search = f"%{keyword}%" + results = ExtractedData.query.filter(ExtractedData.content.like(search)).all() + return results + + diff --git a/lib/posts.py b/lib/posts.py new file mode 100644 index 000000000..299a90b5d --- /dev/null +++ b/lib/posts.py @@ -0,0 +1,13 @@ +class Post: + def __init__(self, id, message, user_id, timestamp): + self.id = id + self.message = message + self.user_id = user_id + self.timestamp = timestamp + + def __eq__(self, other): + return self.__dict__ == other.__dict__ + + + def __repr__(self): + return f"Post(id={self.id}, message='{self.message}', user_id={self.user_id}, timestamp='{self.timestamp}')" diff --git a/lib/search.py b/lib/search.py new file mode 100644 index 000000000..b5d420224 --- /dev/null +++ b/lib/search.py @@ -0,0 +1,26 @@ +import os +import json +from PyPDF2 import PdfFileReader + + + +def simple_search(query, data): + query = query.lower() + best_match = None + highest_count = 0 + + for file_name, text in data.items(): + text_lower = text.lower() + count = text_lower.count(query) + if count > highest_count: + highest_count = count + best_match = text + + if best_match: + start_index = best_match.lower().find(query) + end_index = start_index + len(query) + snippet = best_match[max(0, start_index-30):min(end_index+30, len(best_match))] + return snippet + + return "No relevant information found." + diff --git a/lib/users.py b/lib/users.py new file mode 100644 index 000000000..f5f0385e7 --- /dev/null +++ b/lib/users.py @@ -0,0 +1,14 @@ +class User: + def __init__(self, id, name, username, password, email): + self.id = id + self.name = name + self.username = username + self.password = password + self.email = email + + def __eq__(self, other): + return self.__dict__ == other.__dict__ + + + def __repr__(self): + return f"User(id={self.id}, name='{self.name}', username='{self.username}', email='{self.email}')" \ No newline at end of file diff --git a/myproject/myproject/__init__.py b/myproject/myproject/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/myproject/myproject/items.py b/myproject/myproject/items.py new file mode 100644 index 000000000..2c0acf301 --- /dev/null +++ b/myproject/myproject/items.py @@ -0,0 +1,12 @@ +# Define here the models for your scraped items +# +# See documentation in: +# https://docs.scrapy.org/en/latest/topics/items.html + +import scrapy + + +class MyprojectItem(scrapy.Item): + # define the fields for your item here like: + # name = scrapy.Field() + pass diff --git a/myproject/myproject/middlewares.py b/myproject/myproject/middlewares.py new file mode 100644 index 000000000..3679389cd --- /dev/null +++ b/myproject/myproject/middlewares.py @@ -0,0 +1,103 @@ +# Define here the models for your spider middleware +# +# See documentation in: +# https://docs.scrapy.org/en/latest/topics/spider-middleware.html + +from scrapy import signals + +# useful for handling different item types with a single interface +from itemadapter import is_item, ItemAdapter + + +class MyprojectSpiderMiddleware: + # Not all methods need to be defined. If a method is not defined, + # scrapy acts as if the spider middleware does not modify the + # passed objects. + + @classmethod + def from_crawler(cls, crawler): + # This method is used by Scrapy to create your spiders. + s = cls() + crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) + return s + + def process_spider_input(self, response, spider): + # Called for each response that goes through the spider + # middleware and into the spider. + + # Should return None or raise an exception. + return None + + def process_spider_output(self, response, result, spider): + # Called with the results returned from the Spider, after + # it has processed the response. + + # Must return an iterable of Request, or item objects. + for i in result: + yield i + + def process_spider_exception(self, response, exception, spider): + # Called when a spider or process_spider_input() method + # (from other spider middleware) raises an exception. + + # Should return either None or an iterable of Request or item objects. + pass + + def process_start_requests(self, start_requests, spider): + # Called with the start requests of the spider, and works + # similarly to the process_spider_output() method, except + # that it doesn’t have a response associated. + + # Must return only requests (not items). + for r in start_requests: + yield r + + def spider_opened(self, spider): + spider.logger.info("Spider opened: %s" % spider.name) + + +class MyprojectDownloaderMiddleware: + # Not all methods need to be defined. If a method is not defined, + # scrapy acts as if the downloader middleware does not modify the + # passed objects. + + @classmethod + def from_crawler(cls, crawler): + # This method is used by Scrapy to create your spiders. + s = cls() + crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) + return s + + def process_request(self, request, spider): + # Called for each request that goes through the downloader + # middleware. + + # Must either: + # - return None: continue processing this request + # - or return a Response object + # - or return a Request object + # - or raise IgnoreRequest: process_exception() methods of + # installed downloader middleware will be called + return None + + def process_response(self, request, response, spider): + # Called with the response returned from the downloader. + + # Must either; + # - return a Response object + # - return a Request object + # - or raise IgnoreRequest + return response + + def process_exception(self, request, exception, spider): + # Called when a download handler or a process_request() + # (from other downloader middleware) raises an exception. + + # Must either: + # - return None: continue processing this exception + # - return a Response object: stops process_exception() chain + # - return a Request object: stops process_exception() chain + pass + + def spider_opened(self, spider): + spider.logger.info("Spider opened: %s" % spider.name) diff --git a/myproject/myproject/pipelines.py b/myproject/myproject/pipelines.py new file mode 100644 index 000000000..1569f768d --- /dev/null +++ b/myproject/myproject/pipelines.py @@ -0,0 +1,13 @@ +# Define your item pipelines here +# +# Don't forget to add your pipeline to the ITEM_PIPELINES setting +# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html + + +# useful for handling different item types with a single interface +from itemadapter import ItemAdapter + + +class MyprojectPipeline: + def process_item(self, item, spider): + return item diff --git a/myproject/myproject/settings.py b/myproject/myproject/settings.py new file mode 100644 index 000000000..8aeeaea1c --- /dev/null +++ b/myproject/myproject/settings.py @@ -0,0 +1,109 @@ +# Scrapy settings for myproject project +# +# For simplicity, this file contains only settings considered important or +# commonly used. You can find more settings consulting the documentation: +# +# https://docs.scrapy.org/en/latest/topics/settings.html +# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html +# https://docs.scrapy.org/en/latest/topics/spider-middleware.html + +BOT_NAME = "myproject" + +SPIDER_MODULES = ["myproject.spiders"] +NEWSPIDER_MODULE = "myproject.spiders" + + +# Crawl responsibly by identifying yourself (and your website) on the user-agent +#USER_AGENT = "myproject (+http://www.yourdomain.com)" + +# Obey robots.txt rules +ROBOTSTXT_OBEY = True + +# Configure maximum concurrent requests performed by Scrapy (default: 16) +#CONCURRENT_REQUESTS = 32 + +# Configure a delay for requests for the same website (default: 0) +# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay +# See also autothrottle settings and docs +#DOWNLOAD_DELAY = 3 +# The download delay setting will honor only one of: +#CONCURRENT_REQUESTS_PER_DOMAIN = 16 +#CONCURRENT_REQUESTS_PER_IP = 16 + +# Disable cookies (enabled by default) +#COOKIES_ENABLED = False + +# Disable Telnet Console (enabled by default) +#TELNETCONSOLE_ENABLED = False + +# Override the default request headers: +#DEFAULT_REQUEST_HEADERS = { +# "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", +# "Accept-Language": "en", +#} + +# Enable or disable spider middlewares +# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html +#SPIDER_MIDDLEWARES = { +# "myproject.middlewares.MyprojectSpiderMiddleware": 543, +#} + +# Enable or disable downloader middlewares +# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html +#DOWNLOADER_MIDDLEWARES = { +# "myproject.middlewares.MyprojectDownloaderMiddleware": 543, +#} + +# Enable or disable extensions +# See https://docs.scrapy.org/en/latest/topics/extensions.html +#EXTENSIONS = { +# "scrapy.extensions.telnet.TelnetConsole": None, +#} + +# Configure item pipelines +# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html +#ITEM_PIPELINES = { +# "myproject.pipelines.MyprojectPipeline": 300, +#} + +# Enable and configure the AutoThrottle extension (disabled by default) +# See https://docs.scrapy.org/en/latest/topics/autothrottle.html +#AUTOTHROTTLE_ENABLED = True +# The initial download delay +#AUTOTHROTTLE_START_DELAY = 5 +# The maximum download delay to be set in case of high latencies +#AUTOTHROTTLE_MAX_DELAY = 60 +# The average number of requests Scrapy should be sending in parallel to +# each remote server +#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 +# Enable showing throttling stats for every response received: +#AUTOTHROTTLE_DEBUG = False + +# Enable and configure HTTP caching (disabled by default) +# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings +#HTTPCACHE_ENABLED = True +#HTTPCACHE_EXPIRATION_SECS = 0 +#HTTPCACHE_DIR = "httpcache" +#HTTPCACHE_IGNORE_HTTP_CODES = [] +#HTTPCACHE_STORAGE = "scrapy.extensions.httpcache.FilesystemCacheStorage" + +# Set settings whose default value is deprecated to a future-proof value +REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.7" +TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor" +FEED_EXPORT_ENCODING = "utf-8" + + + +# Enable and configure AutoThrottle +AUTOTHROTTLE_ENABLED = True +AUTOTHROTTLE_START_DELAY = 5 +AUTOTHROTTLE_MAX_DELAY = 60 +AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 +AUTOTHROTTLE_DEBUG = False + +# Configure maximum concurrent requests +CONCURRENT_REQUESTS_PER_DOMAIN = 8 +CONCURRENT_REQUESTS_PER_IP = 8 + +# Configure a delay for requests +DOWNLOAD_DELAY = 3 diff --git a/myproject/myproject/spiders/__init__.py b/myproject/myproject/spiders/__init__.py new file mode 100644 index 000000000..ebd689ac5 --- /dev/null +++ b/myproject/myproject/spiders/__init__.py @@ -0,0 +1,4 @@ +# This package will contain the spiders of your Scrapy project +# +# Please refer to the documentation for information on how to create and manage +# your spiders. diff --git a/myproject/myproject/spiders/makers_spider.py b/myproject/myproject/spiders/makers_spider.py new file mode 100644 index 000000000..7eb6d22ea --- /dev/null +++ b/myproject/myproject/spiders/makers_spider.py @@ -0,0 +1,19 @@ +import scrapy + +class MakersSpider(scrapy.Spider): + name = 'makers' + allowed_domains = ['makers.tech'] + start_urls = ['https://makers.tech/'] + + def parse(self, response): + # Example: Extracting article titles and URLs + for article in response.css('div.article'): + yield { + 'title': article.css('h2::text').get(), + 'url': article.css('a::attr(href)').get() + } + + # Follow links to next pages (if any) + next_page = response.css('a.next::attr(href)').get() + if next_page is not None: + yield response.follow(next_page, self.parse) diff --git a/myproject/myproject/spiders/output.json b/myproject/myproject/spiders/output.json new file mode 100644 index 000000000..c44dc44f3 --- /dev/null +++ b/myproject/myproject/spiders/output.json @@ -0,0 +1,3 @@ +[ + +] \ No newline at end of file diff --git a/myproject/scrapy.cfg b/myproject/scrapy.cfg new file mode 100644 index 000000000..86af274d9 --- /dev/null +++ b/myproject/scrapy.cfg @@ -0,0 +1,11 @@ +# Automatically created by: scrapy startproject +# +# For more information about the [deploy] section see: +# https://scrapyd.readthedocs.io/en/latest/deploy.html + +[settings] +default = myproject.settings + +[deploy] +#url = http://localhost:6800/ +project = myproject diff --git a/seed_dev_database.py b/seed_dev_database.py new file mode 100644 index 000000000..cedbd7306 --- /dev/null +++ b/seed_dev_database.py @@ -0,0 +1,11 @@ +from lib.database_connection import DatabaseConnection + +# Run this file to reset your database using the seeds +# ; pipenv run python seed_dev_database.py + +connection = DatabaseConnection(test_mode=False) +connection.connect() +connection.seed("seeds/chat_messages.sql") +connection.seed("seeds/users.sql") +# Add your own seed lines below... +# E.g.connection.seed("seeds/your_seed.sql") diff --git a/seeds/chat_messages.sql b/seeds/chat_messages.sql new file mode 100644 index 000000000..64eb41f42 --- /dev/null +++ b/seeds/chat_messages.sql @@ -0,0 +1,20 @@ + +DROP TABLE IF EXISTS chat_messages; + + +CREATE TABLE IF NOT EXISTS chat_messages ( + message_id SERIAL PRIMARY KEY, + user_id INT REFERENCES users(user_id), + chatbot_response BOOLEAN DEFAULT FALSE, + message_text TEXT NOT NULL, + created_at TIMESTAMPTZ DEFAULT NOW(), + FOREIGN KEY (user_id) REFERENCES users(id) +); + + + +INSERT INTO chat_messages (user_id, chatbot_response, message_text) VALUES (1, FALSE, 'Hello, chatbot!'); +INSERT INTO chat_messages (user_id, chatbot_response, message_text) VALUES (NULL, TRUE, 'Hello, I am the chatbot.'); + + + diff --git a/seeds/database_connection.sql b/seeds/database_connection.sql new file mode 100644 index 000000000..139629c8d --- /dev/null +++ b/seeds/database_connection.sql @@ -0,0 +1,13 @@ +-- The job of this file is to reset all of our important database tables. +-- And add any data that is needed for the tests to run. +-- This is so that our tests, and application, are always operating from a fresh +-- database state, and that tests don't interfere with each other. + +-- First, we must delete (drop) all our tables +DROP TABLE IF EXISTS test_table; + +-- Then, we recreate them +CREATE TABLE test_table (id SERIAL PRIMARY KEY, name VARCHAR(255)); + +-- Finally, we add any records that are needed for the tests to run +INSERT INTO test_table (name) VALUES ('first_record'); diff --git a/seeds/users.sql b/seeds/users.sql new file mode 100644 index 000000000..21d90b115 --- /dev/null +++ b/seeds/users.sql @@ -0,0 +1,13 @@ + +DROP TABLE IF EXISTS users CASCADE; + + +CREATE TABLE IF NOT EXISTS users ( + user_id SERIAL PRIMARY KEY, + username VARCHAR(255) NOT NULL, + email VARCHAR(255), + created_at TIMESTAMPTZ DEFAULT NOW() +); + + +INSERT INTO users (username, email) VALUES ('john_doe', 'john@example.com'); \ No newline at end of file diff --git a/templates/ai_chatbot.html b/templates/ai_chatbot.html new file mode 100644 index 000000000..f01c04aea --- /dev/null +++ b/templates/ai_chatbot.html @@ -0,0 +1,131 @@ + + + + + + + Maker's Assistant + + + +
+
+

Maker's Assistant

+
+
+ +
+
+ + +
+
+ + + + + + + + + + + + + diff --git a/test_chatgpt.py b/test_chatgpt.py new file mode 100644 index 000000000..0383373ee --- /dev/null +++ b/test_chatgpt.py @@ -0,0 +1,79 @@ + + + +import json +import pytest +from chatgpt import app + +@pytest.fixture +def client(): + app = create_app() # Adjust this to how your Flask app is initialized + app.config['TESTING'] = True + with app.test_client() as client: + yield client + +def test_chatbot_route(client): + response = client.get('/') + assert response.status_code == 200 + assert 'text/html' in response.content_type + +def test_query_route(client): + # Test with valid query + response = client.post('/query', json={'query': 'Hello'}) + assert response.status_code == 200 + assert 'application/json' in response.content_type + assert 'answer' in response.get_json() + + # Test with no query + response = client.post('/query', json={}) + assert response.status_code == 400 + assert 'error' in response.get_json() + +def test_error_route(client): + response = client.get('/test_error') + assert response.status_code == 500 + + + + + + +import json +from unittest import mock +from chatgpt import app + +def test_chatbot(): + with app.test_client() as client: + response = client.get('/') + assert response.status_code == 200 + assert b'' in response.data + +@mock.patch('chatgpt.openai.ChatCompletion.create') +@mock.patch('chatgpt.simple_search') +def test_handle_query(mock_simple_search, mock_chat_completion): + mock_simple_search.return_value = 'Test context' + mock_chat_completion.return_value = mock.Mock(choices=[mock.Mock(message={'content': 'Test answer'})]) + + with app.test_client() as client: + response = client.post('/query', data=json.dumps({'query': 'Test query'}), content_type='application/json') + assert response.status_code == 200 + assert b'Test answer' in response.data + + response = client.post('/query', data=json.dumps({}), content_type='application/json') + assert response.status_code == 400 + assert b'No query provided' in response.data + + + +from unittest import mock +from flask import current_app +from chatgpt import app + +def test_handle_exception(): + with app.app_context(): + with app.test_client() as client: + with mock.patch.object(current_app.logger, 'error') as mock_error: + response = client.get('/test_error') + assert response.status_code == 500 + assert b'Internal Server Error' in response.data + print(mock_error.call_args) \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 000000000..f2a3b03c9 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,50 @@ +import pytest, sys, random, py, pytest, os +from xprocess import ProcessStarter +from lib.database_connection import DatabaseConnection +from chatgpt import app + +# This is a Pytest fixture. +# It creates an object that we can use in our tests. +# We will use it to create a database connection. +@pytest.fixture +def db_connection(): + conn = DatabaseConnection(test_mode=True) + conn.connect() + return conn + +# This fixture starts the test server and makes it available to the tests. +# You don't need to understand it in detail. +@pytest.fixture +def test_web_address(xprocess): + python_executable = sys.executable + app_file = py.path.local(__file__).dirpath("../app.py") + port = str(random.randint(4000, 4999)) + class Starter(ProcessStarter): + env = {"PORT": port, "APP_ENV": "test", **os.environ} + pattern = "Debugger PIN" + args = [python_executable, app_file] + + xprocess.ensure("flask_test_server", Starter) + + yield f"localhost:{port}" + + xprocess.getinfo("flask_test_server").terminate() + + +# Now, when we create a test, if we allow it to accept a parameter called +# `db_connection` or `test_web_address`, Pytest will automatically pass in the +# objects we created above. + +# For example: + +# def test_something(db_connection, test_web_address): +# # db_connection is now available to us in this test. +# # test_web_address is also available to us in this test. + + +# We'll also create a fixture for the client we'll use to make test requests. +@pytest.fixture +def web_client(): + app.config['TESTING'] = True # This gets us better errors + with app.test_client() as client: + yield client \ No newline at end of file diff --git a/tests/test_data_hanlder.py b/tests/test_data_hanlder.py new file mode 100644 index 000000000..26990c437 --- /dev/null +++ b/tests/test_data_hanlder.py @@ -0,0 +1,39 @@ +import pytest +from your_module import read_file, read_pdf_combined, read_pdf_with_pdfplumber, read_text_file_sync +import os + +# Directory where test files are located +TEST_DIR = os.path.join(os.path.dirname(__file__), 'test_files') + +@pytest.fixture +def sample_pdf(): + return os.path.join(TEST_DIR, 'makers_brochure.pdf') + +@pytest.fixture +def sample_text(): + return os.path.join(TEST_DIR, 'data.txt') + +def test_read_file_pdf(sample_pdf): + content = read_file(sample_pdf) + assert content is not None + assert 'expected text in pdf' in content + +def test_read_file_text(sample_text): + content = read_file(sample_text) + assert content is not None + assert 'I have been with Makers since 2014' in content + +def test_read_pdf_combined(sample_pdf): + content = read_pdf_combined(sample_pdf) + assert content is not None + assert 'Why learn to code with Makers?' in content + +def test_read_pdf_with_pdfplumber(sample_pdf): + content = read_pdf_with_pdfplumber(sample_pdf) + assert content is not None + assert 'Why learn to code with Makers?' in content + +def test_read_text_file_sync(sample_text): + content = read_text_file_sync(sample_text) + assert content is not None + assert 'I have been with Makers since 2014' in content diff --git a/tests/test_data_loader.py b/tests/test_data_loader.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/test_database_connection.py b/tests/test_database_connection.py new file mode 100644 index 000000000..60466dec4 --- /dev/null +++ b/tests/test_database_connection.py @@ -0,0 +1,21 @@ +# This is an example of how to use the DatabaseConnection class + +""" +When I seed the database +I get some records back +""" +def test_database_connection(db_connection): + # Seed the database with some test data + db_connection.seed("seeds/database_connection.sql") + + # Insert a new record + db_connection.execute("INSERT INTO test_table (name) VALUES (%s)", ["second_record"]) + + # Retrieve all records + result = db_connection.execute("SELECT * FROM test_table") + + # Assert that the results are what we expect + assert result == [ + {"id": 1, "name": "first_record"}, + {"id": 2, "name": "second_record"} + ] diff --git a/tests/test_db_models.py b/tests/test_db_models.py new file mode 100644 index 000000000..c96950673 --- /dev/null +++ b/tests/test_db_models.py @@ -0,0 +1,44 @@ +# test_models.py +import pytest +from lib.db_models import create_app, db +from lib.db_models import ExtractedData +from lib.db_models import store_data, query_data + +@pytest.fixture +def test_app(): + app = create_app('testing') + with app.app_context(): + db.create_all() + yield app + db.session.remove() + db.drop_all() + +def test_extracted_data_model(test_app): + with test_app.app_context(): + new_data = ExtractedData(source='test', content='test content') + db.session.add(new_data) + db.session.commit() + + assert new_data.id is not None + assert new_data.source == 'test' + assert new_data.content == 'test content' + + + + +def test_store_data(test_app): + with test_app.app_context(): + store_data('web', 'sample content') + stored_data = ExtractedData.query.first() + assert stored_data is not None + assert stored_data.source == 'web' + assert stored_data.content == 'sample content' + +def test_query_data(test_app): + with test_app.app_context(): + store_data('web', 'Python is awesome') + store_data('file', 'Python and Flask') + results = query_data('Python') + assert len(results) == 2 + + diff --git a/tests/test_post_repository.py b/tests/test_post_repository.py new file mode 100644 index 000000000..fac1a39ba --- /dev/null +++ b/tests/test_post_repository.py @@ -0,0 +1,111 @@ +import pytest +from datetime import datetime +from lib.posts import Post +from lib.post_repository import PostRepository +from lib.database_connection import DatabaseConnection + +# Test case for adding a new post +def test_add_post(db_connection): + + db_connection.seed('seeds/post.sql') + repository = PostRepository(db_connection) + + content = "Test Post Content" + user_id = 1 + post_id = repository.add_post(content, user_id) + + assert post_id is not None + +# Test case for getting a post by ID +def test_get_post_by_id(db_connection): + + db_connection.seed('seeds/post.sql') + repository = PostRepository(db_connection) + + # Arrange: Add a post + content = "Test Post Content" + user_id = 1 + post_id = repository.add_post(content, user_id) + + # Act: Retrieve the post by ID + retrieved_post = repository.get_post_by_id(post_id) + + # Assert: Check post details + assert retrieved_post is not None + assert retrieved_post.id == post_id + assert retrieved_post.message == content + assert retrieved_post.user_id == user_id + +# Test case for getting all posts +def test_get_all_posts(db_connection): + + + db_connection.seed('seeds/post.sql') + repository = PostRepository(db_connection) + + # Arrange: Add some test posts + repository.add_post("Post 1", 1) + repository.add_post("Post 2", 2) + + # Act: Retrieve all posts + all_posts = repository.get_all_posts() + + # Assert: Check the number of posts + assert len(all_posts) >= 2 + +# Test case for deleting a post +def test_delete_post(db_connection): + + + db_connection.seed('seeds/post.sql') + repository = PostRepository(db_connection) + # Arrange: Add a post + content = "Test Post Content" + user_id = 1 + post_id = repository.add_post(content, user_id) + + # Act: Delete the post + repository.delete_post(post_id) + + # Assert: Attempt to get the deleted post + deleted_post = repository.get_post_by_id(post_id) + assert deleted_post is None + +# Test case for updating a post +def test_update_post(db_connection): + db_connection.seed('seeds/post.sql') + repository = PostRepository(db_connection) + # Arrange: Add a post + content = "Test Post Content" + user_id = 1 + post_id = repository.add_post(content, user_id) + + # Arrange: Create an updated post + updated_content = "Updated Post Content" + updated_user_id = 2 + updated_post = Post(post_id, updated_content, updated_user_id, datetime.now().timestamp()) + + # Act: Update the post + repository.update_post(updated_post) + + # Assert: Check if the post is updated + retrieved_post = repository.get_post_by_id(post_id) + assert retrieved_post is not None + assert retrieved_post.id == post_id + assert retrieved_post.message == updated_content + assert retrieved_post.user_id == updated_user_id + +# Test case for getting posts by user ID +def test_get_posts_by_user_id(db_connection): + db_connection.seed('seeds/post.sql') + repository = PostRepository(db_connection) + # Arrange: Add posts for a user + user_id = 1 + repository.add_post("Post 1", user_id) + repository.add_post("Post 2", user_id) + + # Act: Retrieve posts by user ID + posts = repository.get_posts_by_user_id(user_id) + + # Assert: Check the number of posts + assert len(posts) >= 2 diff --git a/tests/test_search.py b/tests/test_search.py new file mode 100644 index 000000000..a254f9d4d --- /dev/null +++ b/tests/test_search.py @@ -0,0 +1,20 @@ +# test_search.py +import pytest +from your_flask_app import simple_search + +# Mock data +mock_data = { + "doc1.txt": "Python is an interpreted, high-level and general-purpose programming language.", + "doc2.txt": "Python's design philosophy emphasizes code readability with its notable use of significant whitespace." +} + +def test_simple_search(): + query = "Python" + expected_snippet = "general-purpose programming language. Python's design philosophy emphasizes code" + result = simple_search(query, data=mock_data) + assert result == expected_snippet + +def test_simple_search_no_result(): + query = "nonexistent" + result = simple_search(query, data=mock_data) + assert result == "No relevant information found."