evidens · brahma19 · Jun 26, 2021 · Jun 26, 2021 · Jun 26, 2021 · Jun 26, 2021
diff --git a/fixtures/data.outline.json b/fixtures/data.outline.json
@@ -0,0 +1,17 @@
+{
+  "collection": "nodes",
+  "map": [
+    [
+      "message_Revised",
+      "message.Revised"
+    ],
+    [
+      "message_original",
+      "message.original"
+    ],
+    [
+      "source_author",
+      "source.author"
+    ]
+  ]
+}
diff --git a/fixtures/different_keys_per_row.outline.json b/fixtures/different_keys_per_row.outline.json
@@ -0,0 +1,29 @@
+{
+  "map": [
+    [
+      "this",
+      "this"
+    ],
+    [
+      "tags_0",
+      "tags.0"
+    ],
+    [
+      "tags_1",
+      "tags.1"
+    ],
+    [
+      "tags_2",
+      "tags.2"
+    ],
+    [
+      "that",
+      "that"
+    ],
+    [
+      "theother",
+      "theother"
+    ]
+  ],
+  "collection": "nodes"
+}
diff --git a/fixtures/line_delimited.outline.json b/fixtures/line_delimited.outline.json
@@ -0,0 +1,16 @@
+{
+  "map": [
+    [
+      "source_author",
+      "source.author"
+    ],
+    [
+      "message_original",
+      "message.original"
+    ],
+    [
+      "message_Revised",
+      "message.Revised"
+    ]
+  ]
+}
diff --git a/fixtures/nested_json_frt.csv b/fixtures/nested_json_frt.csv
@@ -0,0 +1,4 @@
+"name","binomial name","major_producers_0","major_producers_1","major_producers_2","nutrition_carbohydrates","nutrition_fat","nutrition_protein"
+"Apple","Malus domestica","China","United States","Turkey","13.81g","0.17g","0.26g"
+"Orange","Citrus x sinensis","Brazil","United States","India","11.75g","0.12g","0.94g"
+"Mango","Mangifera indica","India","China","Thailand","15g","0.38g","0.82g"
diff --git a/fixtures/nested_json_frt.json b/fixtures/nested_json_frt.json
@@ -0,0 +1,46 @@
+{
+    "fruit": [
+        {
+            "name": "Apple",
+            "binomial name": "Malus domestica",
+            "major_producers": [
+                "China",
+                "United States",
+                "Turkey"
+            ],
+            "nutrition": {
+                "carbohydrates": "13.81g",
+                "fat": "0.17g",
+                "protein": "0.26g"
+            }
+        },
+        {
+            "name": "Orange",
+            "binomial name": "Citrus x sinensis",
+            "major_producers": [
+                "Brazil",
+                "United States",
+                "India"
+            ],
+            "nutrition": {
+                "carbohydrates": "11.75g",
+                "fat": "0.12g",
+                "protein": "0.94g"
+            }
+        },
+        {
+            "name": "Mango",
+            "binomial name": "Mangifera indica",
+            "major_producers": [
+                "India",
+                "China",
+                "Thailand"
+            ],
+            "nutrition": {
+                "carbohydrates": "15g",
+                "fat": "0.38g",
+                "protein": "0.82g"
+            }
+        }
+    ]
+}
diff --git a/fixtures/nested_json_frt.outline.json b/fixtures/nested_json_frt.outline.json
@@ -0,0 +1,37 @@
+{
+  "map": [
+    [
+      "name",
+      "name"
+    ],
+    [
+      "binomial name",
+      "binomial name"
+    ],
+    [
+      "major_producers_0",
+      "major_producers.0"
+    ],
+    [
+      "major_producers_1",
+      "major_producers.1"
+    ],
+    [
+      "major_producers_2",
+      "major_producers.2"
+    ],
+    [
+      "nutrition_carbohydrates",
+      "nutrition.carbohydrates"
+    ],
+    [
+      "nutrition_fat",
+      "nutrition.fat"
+    ],
+    [
+      "nutrition_protein",
+      "nutrition.protein"
+    ]
+  ],
+  "collection": "fruit"
+}
diff --git a/fixtures/outline.json b/fixtures/outline.json
@@ -1,7 +1,17 @@
-{
-    "map": [
-        ["author", "source.author"],
-        ["message", "message.original"]
-    ],
-    "collection": "nodes"
-}
+{
+  "collection": "nodes",
+  "map": [
+    [
+      "message_Revised",
+      "message.Revised"
+    ],
+    [
+      "message_original",
+      "message.original"
+    ],
+    [
+      "source_author",
+      "source.author"
+    ]
+  ]
+}
diff --git a/gen_outline.py b/gen_outline.py
@@ -6,7 +6,7 @@
 def key_paths(d):
     def helper(path, x):
         if isinstance(x, dict):
-            for k, v in x.iteritems():
+            for k, v in x.items():
                 for ret in helper(path + [k], v):
                     yield ret
         elif isinstance(x, list):
@@ -40,6 +40,8 @@ def key_map_to_list(key_map):
     # We convert to strings *after* sorting so that array indices come out
     # in the correct order.
     return [(path_join(k, '_'), path_join(k)) for k in sorted(key_map.keys())]
+    # to get keys in the order defined in json ,not doing any sort
+    # return [(path_join(k, '_'), path_join(k)) for k in key_map.keys()]
 
 def make_outline(json_file, each_line, collection_key):
     if each_line:
@@ -77,7 +79,7 @@ def main():
     if outfile is None:
         fileName, fileExtension = os.path.splitext(args.json_file.name)
         outfile = fileName + '.outline.json'
-
+#sort the key , this will make sure that the output file will always have the same order of fields 
     with open(outfile, 'w') as f:
         json.dump(outline, f, indent=2, sort_keys=True)
 

diff --git a/json2csv.py b/json2csv.py
@@ -1,15 +1,15 @@
 #!/usr/bin/env python
 
-try:
-    import unicodecsv as csv
-except ImportError:
-    import csv
-
+#python3 is by default unicode
+import csv
 import json
 import operator
 import os
 from collections import OrderedDict
 import logging
+import argparse
+#reduce is part of functools for py3
+import functools as ft
 
 logging.basicConfig(level=logging.DEBUG)
 
@@ -59,7 +59,7 @@ def process_each(self, data):
             data = data[self.collection]
 
         for d in data:
-            logging.info(d)
+            #logging.info(d)
             self.rows.append(self.process_row(d))
 
     def process_row(self, item):
@@ -69,7 +69,7 @@ def process_row(self, item):
 
         for header, keys in self.key_map.items():
             try:
-                row[header] = reduce(operator.getitem, keys, item)
+                row[header] = ft.reduce(operator.getitem, keys, item)
             except (KeyError, IndexError, TypeError):
                 row[header] = None
 
@@ -88,7 +88,7 @@ def make_string(self, item):
         elif isinstance(item, dict):
             return self.DICT_OPEN + self.DICT_SEP_CHAR.join([self.KEY_VAL_CHAR.join([k, self.make_string(val)]) for k, val in item.items()]) + self.DICT_CLOSE
         else:
-            return unicode(item)
+            return item
 
     def write_csv(self, filename='output.csv', make_strings=False):
         """Write the processed rows to the given filename
@@ -99,8 +99,9 @@ def write_csv(self, filename='output.csv', make_strings=False):
             out = self.make_strings()
         else:
             out = self.rows
-        with open(filename, 'wb+') as f:
-            writer = csv.DictWriter(f, self.key_map.keys())
+        #opening with write mode only and specifying unix dilect to quote all fields    
+        with open(filename, 'w') as f:
+            writer = csv.DictWriter(f, self.key_map.keys(), dialect='unix')
             writer.writeheader()
             writer.writerows(out)
 
@@ -119,7 +120,6 @@ def process_each(self, data, collection=None):
 
 
 def init_parser():
-    import argparse
     parser = argparse.ArgumentParser(description="Converts JSON to CSV")
     parser.add_argument('json_file', type=argparse.FileType('r'),
                         help="Path to JSON data file to load")
@@ -153,3 +153,4 @@ def init_parser():
         outfile = fileName + '.csv'
 
     loader.write_csv(filename=outfile, make_strings=args.strings)
+
diff --git a/requirements.txt b/requirements.txt
diff --git a/tests.py b/tests.py
@@ -26,8 +26,8 @@ def test_process_row(self):
         self.assertIn('id', row.keys())
         self.assertIn('count', row.keys())
 
-        self.assertEquals(row['id'], 'Someone')
-        self.assertEquals(row['count'], 1)
+        self.assertEqual(row['id'], 'Someone')
+        self.assertEqual(row['count'], 1)
 
     def test_process_row_nested_data(self):
         """Ensure that nested keys (with . notation) are processed"""
@@ -42,8 +42,8 @@ def test_process_row_nested_data(self):
         self.assertIn('author', row.keys())
         self.assertIn('message', row.keys())
 
-        self.assertEquals(row['author'], 'Someone')
-        self.assertEquals(row['message'], 'Hey!')
+        self.assertEqual(row['author'], 'Someone')
+        self.assertEqual(row['message'], 'Hey!')
 
     def test_process_row_array_index(self):
         """Ensure that array indices are properly handled as part of the dot notation"""
@@ -56,14 +56,14 @@ def test_process_each(self):
         test_data = json.loads('{"result":[{"_id" : "Someone","count" : 1}]}')
         loader.process_each(test_data)
 
-        self.assertEquals(len(loader.rows), 1)
+        self.assertEqual(len(loader.rows), 1)
         row = loader.rows[0]
         self.assertIs(type(row), dict)
         self.assertIn('id', row.keys())
         self.assertIn('count', row.keys())
 
-        self.assertEquals(row['id'], 'Someone')
-        self.assertEquals(row['count'], 1)
+        self.assertEqual(row['id'], 'Someone')
+        self.assertEqual(row['count'], 1)
 
     def test_process_each_optional_key(self):
         """Ensure a key that is not always present won't prevent data extraction
@@ -75,12 +75,12 @@ def test_process_each_optional_key(self):
         test_data = json.loads('''[
           {"_id": "Someone","count": 1, "tags": ["super"]},
           {"_id": "Another", "tags": []}]''')
-        self.assertEquals(len(test_data), 2)
+        self.assertEqual(len(test_data), 2)
         loader.process_each(test_data)
 
-        self.assertEquals(len(loader.rows), 2)
+        self.assertEqual(len(loader.rows), 2)
         second_row = loader.rows[1]
-        self.assertEquals(second_row['id'], 'Another')
+        self.assertEqual(second_row['id'], 'Another')
         # works for missing dict keys
         self.assertIsNone(second_row['count'])
         # and missing list indices