villancikos
diff --git a/‎## Basic Scrapping.txt
+25 b/‎## Basic Scrapping.txt
+25
diff --git a/‎### quick guide to heroku ###.txt
+35 b/‎### quick guide to heroku ###.txt
+35
diff --git a/‎.gitignore
+20 b/‎.gitignore
+20
diff --git a/‎.gitignorels b/‎.gitignorels
diff --git a/‎README
+1 b/‎README
+1
diff --git a/‎README.md
+1 b/‎README.md
+1
diff --git a/‎api_for_rotten.py
+43 b/‎api_for_rotten.py
+43
diff --git a/‎api_sentiments.py
+6 b/‎api_sentiments.py
+6
diff --git a/‎cars.json
+19 b/‎cars.json
+19
diff --git a/‎chapter 3/bs_scraping.py
+9 b/‎chapter 3/bs_scraping.py
+9
diff --git a/‎chapter 3/clientp.py
+20 b/‎chapter 3/clientp.py
+20
diff --git a/‎chapter 3/hackernews/hackernews/__init__.py b/‎chapter 3/hackernews/hackernews/__init__.py
diff --git a/‎chapter 3/hackernews/hackernews/items.csv
+62 b/‎chapter 3/hackernews/hackernews/items.csv
+62
diff --git a/‎chapter 3/hackernews/hackernews/items.py
+15 b/‎chapter 3/hackernews/hackernews/items.py
+15
diff --git a/‎chapter 3/hackernews/hackernews/pipelines.py
+11 b/‎chapter 3/hackernews/hackernews/pipelines.py
+11
diff --git a/‎chapter 3/hackernews/hackernews/settings.py
+17 b/‎chapter 3/hackernews/hackernews/settings.py
+17
diff --git a/‎chapter 3/hackernews/hackernews/spiders/__init__.py
+4 b/‎chapter 3/hackernews/hackernews/spiders/__init__.py
+4
diff --git a/‎chapter 3/hackernews/hackernews/spiders/spider.py
+26 b/‎chapter 3/hackernews/hackernews/spiders/spider.py
+26
diff --git a/‎chapter 3/hackernews/scrapy.cfg
+11 b/‎chapter 3/hackernews/scrapy.cfg
+11
diff --git a/‎chapter 3/socrata/project.db
324 KB b/‎chapter 3/socrata/project.db
324 KB
diff --git a/‎chapter 3/socrata/scrapy.cfg
+11 b/‎chapter 3/socrata/scrapy.cfg
+11
diff --git a/‎chapter 3/socrata/socrata/__init__.py b/‎chapter 3/socrata/socrata/__init__.py
diff --git a/‎chapter 3/socrata/socrata/db.py
+5 b/‎chapter 3/socrata/socrata/db.py
+5
@@ -0,0 +1,25 @@
+## Basic Scrapping
+
+# Using the scrapy shell serves for testing xpath expresions:
+scrapy shell <url>
+
+...
+
+scrapy shell http://news.ycombinator.com
+
+
+response.xpath('//td[@class="title"]').extract()
+
+response.xpath('//td[@class="title"]').extract()[0]
+
+response.xpath('//td[@class="title"]/a').extract()[0]
+
+response.xpath('//td[@class="title"]/a/text()').extract()[0]
+
+response.xpath('//td[@class="title"]/a/@href').extract()[0]
+
+response.xpath('//span[@class=yclinks"]/a[3]/@href').extract() <-- library link at the bottom of Hacker News
+
+response.xpath('//td[@class="subtext"]/a/@href').extract()[0] <-- comment links
+
+response.xpath('//td[@class="subtext"]/a/text()').extract()[0] <-- comment links
@@ -0,0 +1,35 @@
+### quick guide to heroku ###
+heroku login -- enter credentials
+
+activate virtualenv
+
+use pip freeze > requirements.txt -- so heroku can understand
+what your app needs
+
+create a Procfile
+add code: web: python run.py and save it like Procfile no extension
+
+update run.py (this added the port)
+
+git init
+git add .
+git commit -am "initial"
+
+heroku create
+
+git push heroku master
+heroku ps:scale web=1
+
+heroku ps <-- see if app is serving
+
+heroku open <-- launches browser with app
+
+herokus logs <-- shows logs
+
+### in case of trouble ###
+remember to make changes if log throws errors and then
+add changes and commit them.
+and then heroku push too.
+
+Also watchout for errors on rsa key
+
@@ -0,0 +1,20 @@
+#python specific
+*.pyc
+ 
+## generic files to ignore
+*~
+*.lock
+*.DS_Store
+*.swp
+*.out
+*.pdf
+
+# Virtualenv
+.Python
+bin
+lib
+include
+
+# Mac OS X custom attribute files
+.DS_Store
+
@@ -0,0 +1 @@
+hello world shit sheep ship
@@ -0,0 +1 @@
+Excercises from Real Python book 2
@@ -0,0 +1,43 @@
+# GET data from Rotten Tomatoes, parse and write to database
+
+import json, requests, sqlite3
+
+API_KEY = 'm5549a8w6d7z9w7d4yugpezf'
+url = requests.get("http://api.rottentomatoes.com/api/public/v1.0/"+
+                   "lists/movies/in_theaters.json?apikey={}".format(API_KEY))
+
+# convert data from feed to binary
+binary = url.content
+#print "This is binary\n"
+#print binary
+
+# decode the json feed
+output = json.loads(binary)
+#print "\nthis is output\n"
+#print output
+
+# grab the list of movies
+movies = output["movies"]
+
+with sqlite3.connect("movies.db") as connection:
+    c = connection.cursor()
+
+    # iterate through each movie and write to the database
+    for movie in movies:
+        c.execute("INSERT INTO new_movies VALUES (?,?,?,?,?,?,?)",
+                  (movie["title"], movie["year"],
+                   movie["mpaa_rating"],
+                   movie["release_dates"]["theater"],
+                   movie["runtime"], movie["ratings"]["critics_score"],
+                   movie["ratings"]["audience_score"]))
+    # retrieve data
+    c.execute("SELECT * FROM new_movies ORDER BY title ASC")
+
+    # fetchall() retrieves all records from the query
+    rows = c.fetchall()
+
+    # output the rows to the screen, row by row
+    for r in rows:
+        print "title "+str(r[0]), r[1], r[2], r[4], r[5], r[6]
+
+
@@ -0,0 +1,6 @@
+import requests
+
+url = 'http://text-processing.com/api/sentiment/'
+data = {'text':'great'}
+    r = requests.post(url,data=data)
+print r.content
@@ -0,0 +1,19 @@
+{
+ "CARS":[
+{
+  "MAKE":"Ford",
+  "MODEL":"Focus",
+  "COST":"15000"
+},
+{
+  "MAKE":"Honda",
+  "MODEL":"Civic",
+  "COST":"20000"
+},
+{
+  "MAKE":"Toyota",
+  "MODEL":"Camry", "COST":"25000"
+}, {
+"MAKE":"Honda", "MODEL":"Accord", "COST":"22000"
+} ]
+}
@@ -0,0 +1,9 @@
+from bs4 import BeautifulSoup
+from urllib2 import urlopen
+URL = 'http://web2py.com'
+htmlPage = urlopen(URL)
+htmlText = htmlPage.read()
+mySoup = BeautifulSoup(htmlText)
+
+for link in mySoup.find_all('a'):
+    print link["href"]
@@ -0,0 +1,20 @@
+# Download stock quotes in CSV
+
+import requests
+import time
+
+i = 0
+stock_list = ["GOOG","YHOO","AOL"]
+while (i<1):
+    base_url = 'http://download.finance.yahoo.com/d/quotes.csv'
+    # retrieve data from web server
+    for stock in stock_list:
+        data = requests.get(base_url, params ={'s':stock,'f':'sl1d1t1c1ohgv', 'e': '.csv'})
+
+    # write the data to csv
+        with open("stocks.csv", "a") as code:
+            code.write(data.content)
+    i+=1
+
+    # pause for 3 seconds
+    time.sleep(3)
@@ -0,0 +1,62 @@
+url,title
+,
+http://projecteuler.net/news,Project Euler Returns
+,
+http://www.technologyreview.com/news/527051/the-man-who-really-built-bitcoin/,"Gavin Andresen, the Most Powerful Person in the World of Bitcoin"
+,
+https://code.google.com/p/chromium/issues/detail?id=162757#c64,Blink won’t implement pointer events
+,
+http://techcrunch.com/2014/08/15/zen99/,"Zen99 (YC S14) Makes Life Easier for Freelancers with Finance, Insurance Tools"
+,
+http://www.practicalguidetomobileapps.com/,Show HN: Practical Guide to Mobile Apps Using Phonegap and Famo.us
+,
+http://www.bloomberg.com/news/2014-08-14/san-francisco-office-rents-seen-topping-manhattan-in-2015.html,San Francisco Office Rents Seen Topping Manhattan in 2015
+,
+http://s3.thinkaurelius.com/docs/titan/0.5.0/,Titan Distributed Graph Database 0.5.0
+,
+http://www.shellcheck.net/,ShellCheck – Online shell script analyzer
+,
+http://acko.net/files/pres/siggraph-2014-bof/online.html,MathBox 2
+,
+http://simonschreibt.de/gat/renderhell/ ,Render Hell 1.0
+,
+http://kivy.org/,Kivy – Open-source Python library for rapid development of applications
+,
+http://datatracker.ietf.org/doc/draft-kirsch-ietf-tcp-stealth/,TCP Stealth
+,
+http://www.math.vanderbilt.edu/~schectex/commerrs/,The most common errors in undergraduate mathematics
+,
+http://www.newyorker.com/magazine/2012/12/24/utopian-for-beginners,An amateur linguist loses control of the language he invented (2012)
+,
+http://limscentral.com/,Show HN: I created a Laboratory Customer Relationship Management Tool
+,
+https://bugs.ruby-lang.org/issues/10137,Incremental GC in Ruby MRI
+,
+http://martiancraft.com/blog/2014/08/an-unreal-decision/,An Unreal Decision
+,
+http://www.theatlantic.com/technology/archive/2014/08/why-email-will-never-die/375973/,Email Is Still the Best Thing on the Internet
+,
+http://sebastien-gabriel.com/designers-guide-to-dpi/,Designers Guide To DPI
+,
+http://www.slate.com/blogs/bad_astronomy/2014/08/14/portraits_in_uv_thomas_leveritt_video_of_faces_in_ultraviolet.html,The Faces of Ultraviolet
+,
+http://www.wired.com/2014/08/a-hair-salon-gurus-next-big-thing-ending-the-12b-tyranny-of-shampoo/,A Hair Salon Guru’s Next Big Thing: Ending Shampoo
+,
+http://techcrunch.com/2014/08/14/y-combinator-and-mithril-invest-in-helion-a-nuclear-fusion-startup/,"Y Combinator And Mithril Invest In Helion, A Nuclear Fusion Startup"
+,
+http://www.bbc.com/news/business-28756059,The 30-year-old health sector billionaire
+,
+http://www.theatlantic.com/features/archive/2014/08/the-future-of-college/375071/,The Future of College?
+,
+http://www.theatlantic.com/technology/archive/2014/08/advertising-is-the-internets-original-sin/376041/,It's not too late to ditch the ad-based business model and build a better web
+,
+https://github.com/siddontang/mixer,Mixer – A MySQL Proxy powered by Go
+,
+http://bonsaiden.github.io/Tuff.gb/,Show HN: I'm building a game for the Nintendo GameBoy
+,
+https://itunes.apple.com/us/app/biodigital-human-anatomy-health/id771825569,BioDigital: A 3D Medical Anatomy App
+,
+https://careers.stackoverflow.com/jobs/65030/full-stack-junior-developer-unbabel,Unbabel (YC W14) is looking for a junior developer in Portugal
+,
+http://henrysmith.org/blog/2014/08/04/not-planning-any-nuclear-attacks/,I am not planning any nuclear attacks
+news?p=2,More
@@ -0,0 +1,15 @@
+# -*- coding: utf-8 -*-
+
+# Define here the models for your scraped items
+#
+# See documentation in:
+# http://doc.scrapy.org/en/latest/topics/items.html
+
+from scrapy.item import Item, Field
+
+
+class HackernewsItem(Item):
+    # define the fields for your item here like:
+    # name = scrapy.Field()
+    title = Field()
+    url = Field()
@@ -0,0 +1,11 @@
+# -*- coding: utf-8 -*-
+
+# Define your item pipelines here
+#
+# Don't forget to add your pipeline to the ITEM_PIPELINES setting
+# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
+
+
+class HackernewsPipeline(object):
+    def process_item(self, item, spider):
+        return item
@@ -0,0 +1,17 @@
+# -*- coding: utf-8 -*-
+
+# Scrapy settings for hackernews project
+#
+# For simplicity, this file contains only the most important settings by
+# default. All the other settings are documented here:
+#
+#     http://doc.scrapy.org/en/latest/topics/settings.html
+#
+
+BOT_NAME = 'hackernews'
+
+SPIDER_MODULES = ['hackernews.spiders']
+NEWSPIDER_MODULE = 'hackernews.spiders'
+
+# Crawl responsibly by identifying yourself (and your website) on the user-agent
+#USER_AGENT = 'hackernews (+http://www.yourdomain.com)'
@@ -0,0 +1,4 @@
+# This package will contain the spiders of your Scrapy project
+#
+# Please refer to the documentation for information on how to create and manage
+# your spiders.
@@ -0,0 +1,26 @@
+# spider.py
+
+from scrapy.spider import BaseSpider
+from scrapy.selector import HtmlXPathSelector
+from hackernews.items import HackernewsItem
+
+class MySpider(BaseSpider):
+    # Naming the spider
+    name = "wiki"
+
+    # allowed domains to scrape
+    allowed_domains = ["en.wikipedia.org"]
+    start_urls = ["http://en.wikipedia.org/wiki/Category:2014_films"]
+
+    def parse(self,response):
+        hxs = HtmlXPathSelector(response)
+        titles = response.xpath('//tr[@style="vertical-align: top;"]//li')
+        items = []
+        for title in titles:
+            item = WikipediaItem()
+            item["title"] = title.select("a/text()").extract()
+            item["url"] = title.select("a/@href").extract()
+            items.append(item)
+        return items
+
+
@@ -0,0 +1,11 @@
+# Automatically created by: scrapy startproject
+#
+# For more information about the [deploy] section see:
+# http://doc.scrapy.org/en/latest/topics/scrapyd.html
+
+[settings]
+default = hackernews.settings
+
+[deploy]
+#url = http://localhost:6800/
+project = hackernews
@@ -0,0 +1,11 @@
+# Automatically created by: scrapy startproject
+#
+# For more information about the [deploy] section see:
+# http://doc.scrapy.org/en/latest/topics/scrapyd.html
+
+[settings]
+default = socrata.settings
+
+[deploy]
+#url = http://localhost:6800/
+project = socrata
@@ -0,0 +1,5 @@
+import sqlite3
+
+conn = sqlite3.connect("project.db")
+cursor = conn.cursor()
+cursor.execute("""CREATE TABLE data (text TEXT, url TEXT, views TEXT)""")