all files added

kaustubhgupta · kaustubhgupta · commit 9c9bb90ff37b · 2020-09-10T18:39:57.000+05:30
diff --git a/System-Automation-Scripts/PDF_Tables_To_CSV/.gitignore b/System-Automation-Scripts/PDF_Tables_To_CSV/.gitignore
@@ -0,0 +1,9 @@
+# Vscode files
+.vscode
+
+# Sample Files
+sample.pdf
+sample2.pdf
+
+# Python
+__pycache__
diff --git a/System-Automation-Scripts/PDF_Tables_To_CSV/README.md b/System-Automation-Scripts/PDF_Tables_To_CSV/README.md
@@ -0,0 +1,22 @@
+# PDF to CSV
+This scrpit will convert the tables in the PDF file into CSV files. Each CSV file has one table from the PDF and number of CSV equal to number of tables in the PDF.
+
+# Requirements
+`pip install tabula-py, pandas`
+
+# How to use?
+Just use the following command while executing the scrpit:
+
+`python app.py location_of_pdf pages`
+
+Pages have two options:
+- 'all' will extract tables from whole PDF
+- specific page (ex 1,2,54..) will extract table from that page
+
+Example:
+- `python app.py sample.pdf all`
+- `python app.py sample2.pdf 45`
+
+# Preview
+
+![](preview.gif)
diff --git a/System-Automation-Scripts/PDF_Tables_To_CSV/app.py b/System-Automation-Scripts/PDF_Tables_To_CSV/app.py
@@ -0,0 +1,19 @@
+import tabula
+import pandas as pd
+import sys
+
+def extract(path, number_pages):
+    tables = tabula.read_pdf(path, multiple_tables=True, pages=number_pages)
+    count = 1
+    if len(tables)!=0:
+        for table in tables:
+            print
+            print(f"Saving file -{count}")
+            table.to_csv(f'Table- {count}.csv')
+            count += 1
+        print("All tables saved as seperate files !")
+    else:
+        print("No tables found !")
+
+if __name__ == "__main__":
+    extract(sys.argv[1], sys.argv[2])
diff --git a/System-Automation-Scripts/PDF_Tables_To_CSV/preview.gif b/System-Automation-Scripts/PDF_Tables_To_CSV/preview.gif