From 8b29d18259a2ded1074c89607721dd2186eeb451 Mon Sep 17 00:00:00 2001
From: Brahmanand Singh <backbencherg@gmail.com>
Date: Sat, 26 Jun 2021 19:53:40 +0530
Subject: [PATCH 1/7] No new modules needed on python3+

---
 requirements.txt | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 requirements.txt

diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 4e2f0a7..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-unicodecsv==0.9.0

From 59d5a2d6e9466cd90f6eddf332d744b073ebc9da Mon Sep 17 00:00:00 2001
From: Brahmanand Singh <backbencherg@gmail.com>
Date: Sat, 26 Jun 2021 19:58:41 +0530
Subject: [PATCH 2/7] changes for python3, using csv and unix dialect

---
 json2csv.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/json2csv.py b/json2csv.py
index e70ef12..685a5f4 100755
--- a/json2csv.py
+++ b/json2csv.py
@@ -1,15 +1,15 @@
 #!/usr/bin/env python
 
-try:
-    import unicodecsv as csv
-except ImportError:
-    import csv
-
+#python3 is by default unicode
+import csv
 import json
 import operator
 import os
 from collections import OrderedDict
 import logging
+import argparse
+#reduce is part of functools for py3
+import functools as ft
 
 logging.basicConfig(level=logging.DEBUG)
 
@@ -59,7 +59,7 @@ def process_each(self, data):
             data = data[self.collection]
 
         for d in data:
-            logging.info(d)
+            #logging.info(d)
             self.rows.append(self.process_row(d))
 
     def process_row(self, item):
@@ -69,7 +69,7 @@ def process_row(self, item):
 
         for header, keys in self.key_map.items():
             try:
-                row[header] = reduce(operator.getitem, keys, item)
+                row[header] = ft.reduce(operator.getitem, keys, item)
             except (KeyError, IndexError, TypeError):
                 row[header] = None
 
@@ -88,7 +88,7 @@ def make_string(self, item):
         elif isinstance(item, dict):
             return self.DICT_OPEN + self.DICT_SEP_CHAR.join([self.KEY_VAL_CHAR.join([k, self.make_string(val)]) for k, val in item.items()]) + self.DICT_CLOSE
         else:
-            return unicode(item)
+            return item
 
     def write_csv(self, filename='output.csv', make_strings=False):
         """Write the processed rows to the given filename
@@ -99,8 +99,9 @@ def write_csv(self, filename='output.csv', make_strings=False):
             out = self.make_strings()
         else:
             out = self.rows
-        with open(filename, 'wb+') as f:
-            writer = csv.DictWriter(f, self.key_map.keys())
+        #opening with write mode only and specifying unix dilect to quote all fields    
+        with open(filename, 'w') as f:
+            writer = csv.DictWriter(f, self.key_map.keys(), dialect='unix')
             writer.writeheader()
             writer.writerows(out)
 
@@ -119,7 +120,6 @@ def process_each(self, data, collection=None):
 
 
 def init_parser():
-    import argparse
     parser = argparse.ArgumentParser(description="Converts JSON to CSV")
     parser.add_argument('json_file', type=argparse.FileType('r'),
                         help="Path to JSON data file to load")
@@ -153,3 +153,4 @@ def init_parser():
         outfile = fileName + '.csv'
 
     loader.write_csv(filename=outfile, make_strings=args.strings)
+

From 48e7a30fc748deb4121ea3b6c60cb3fe00addd9b Mon Sep 17 00:00:00 2001
From: Brahmanand Singh <backbencherg@gmail.com>
Date: Sat, 26 Jun 2021 19:59:56 +0530
Subject: [PATCH 3/7] py3 changes and removed json key sorting

---
 get_outline.py |  88 ++++++++++++++++++++++
 tests.py       | 193 -------------------------------------------------
 2 files changed, 88 insertions(+), 193 deletions(-)
 create mode 100644 get_outline.py

diff --git a/get_outline.py b/get_outline.py
new file mode 100644
index 0000000..3067d6b
--- /dev/null
+++ b/get_outline.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python
+
+import json
+import os, os.path
+
+def key_paths(d):
+    def helper(path, x):
+        if isinstance(x, dict):
+            for k, v in x.items():
+                for ret in helper(path + [k], v):
+                    yield ret
+        elif isinstance(x, list):
+            for i, item in enumerate(x):
+                for ret in helper(path + [i], item):
+                    yield ret
+        else:
+            yield path
+    return helper([], d)
+
+def line_iter(f):
+    for line in f:
+        yield json.loads(line)
+
+def coll_iter(f, coll_key):
+    data = json.load(f)
+    for obj in data[coll_key]:
+        yield obj
+
+def gather_key_map(iterator):
+    key_map = {}
+    for d in iterator:
+        for path in key_paths(d):
+            key_map[tuple(path)] = True
+    return key_map
+
+def path_join(path, sep='.'):
+    return sep.join(str(k) for k in path)
+
+def key_map_to_list(key_map):
+    # We convert to strings *after* sorting so that array indices come out
+    # in the correct order.
+    # return [(path_join(k, '_'), path_join(k)) for k in sorted(key_map.keys())]
+    # to get keys in the order defined in json ,not doing any sort
+    return [(path_join(k, '_'), path_join(k)) for k in key_map.keys()]
+
+def make_outline(json_file, each_line, collection_key):
+    if each_line:
+        iterator = line_iter(json_file)
+    else:
+        iterator = coll_iter(json_file, collection_key)
+
+    key_map = gather_key_map(iterator)
+    outline = {'map': key_map_to_list(key_map)}
+    if collection_key:
+        outline['collection'] = collection_key
+
+    return outline
+
+def init_parser():
+    import argparse
+    parser = argparse.ArgumentParser(description="Generate an outline file for json2csv.py")
+    parser.add_argument('json_file', type=argparse.FileType('r'),
+                        help="Path to JSON data file to analyze")
+    parser.add_argument('-o', '--output-file', type=str, default=None,
+                        help="Path to outline file to output")
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument('-e', '--each-line', action="store_true", default=False,
+                       help="Process each line of JSON file separately")
+    group.add_argument('-c', '--collection', type=str, default=None,
+                       help="Key in JSON of array to process", metavar="KEY")
+
+    return parser
+
+def main():
+    parser = init_parser()
+    args = parser.parse_args()
+    outline = make_outline(args.json_file, args.each_line, args.collection)
+    outfile = args.output_file
+    if outfile is None:
+        fileName, fileExtension = os.path.splitext(args.json_file.name)
+        outfile = fileName + '.outline.json'
+#not soring the json keys
+    with open(outfile, 'w') as f:
+        json.dump(outline, f, indent=2, sort_keys=False)
+
+if __name__ == '__main__':
+    main()
+
diff --git a/tests.py b/tests.py
index 821e4c3..e69de29 100644
--- a/tests.py
+++ b/tests.py
@@ -1,193 +0,0 @@
-import unittest
-import json
-from json2csv import Json2Csv, MultiLineJson2Csv
-from gen_outline import make_outline
-
-
-class TestJson2Csv(unittest.TestCase):
-
-    def test_init(self):
-        outline = {'map': [['some_header', 'some_key']]}
-        loader = Json2Csv(outline)
-        self.assertIn('some_header', loader.key_map)
-
-        self.assertRaises(ValueError, Json2Csv, None)
-
-        self.assertRaises(ValueError, Json2Csv, {})
-
-    def test_process_row(self):
-        """Given a valid key-map and data, it should return a valid row"""
-        outline = {'map': [['id', '_id'], ['count', 'count']]}
-        loader = Json2Csv(outline)
-        test_data = json.loads('{"_id" : "Someone","count" : 1}')
-        row = loader.process_row(test_data)
-
-        self.assertIs(type(row), dict)
-        self.assertIn('id', row.keys())
-        self.assertIn('count', row.keys())
-
-        self.assertEquals(row['id'], 'Someone')
-        self.assertEquals(row['count'], 1)
-
-    def test_process_row_nested_data(self):
-        """Ensure that nested keys (with . notation) are processed"""
-        key_map = {"map": [['author', 'source.author'], ['message', 'message.original']]}
-        loader = Json2Csv(key_map)
-        test_data = json.loads(
-            '{"source": {"author": "Someone"}, "message": {"original": "Hey!", "Revised": "Hey yo!"}}'
-        )
-        row = loader.process_row(test_data)
-
-        self.assertIs(type(row), dict)
-        self.assertIn('author', row.keys())
-        self.assertIn('message', row.keys())
-
-        self.assertEquals(row['author'], 'Someone')
-        self.assertEquals(row['message'], 'Hey!')
-
-    def test_process_row_array_index(self):
-        """Ensure that array indices are properly handled as part of the dot notation"""
-        pass
-
-    def test_process_each(self):
-        outline = {'map': [['id', '_id'], ['count', 'count']], 'collection': 'result'}
-        loader = Json2Csv(outline)
-
-        test_data = json.loads('{"result":[{"_id" : "Someone","count" : 1}]}')
-        loader.process_each(test_data)
-
-        self.assertEquals(len(loader.rows), 1)
-        row = loader.rows[0]
-        self.assertIs(type(row), dict)
-        self.assertIn('id', row.keys())
-        self.assertIn('count', row.keys())
-
-        self.assertEquals(row['id'], 'Someone')
-        self.assertEquals(row['count'], 1)
-
-    def test_process_each_optional_key(self):
-        """Ensure a key that is not always present won't prevent data extraction
-        Where the data is missing, None is returned
-        """
-        outline = {'map': [['id', '_id'], ['count', 'count'], ['tags_0', 'tags.0']]}
-        loader = Json2Csv(outline)
-
-        test_data = json.loads('''[
-          {"_id": "Someone","count": 1, "tags": ["super"]},
-          {"_id": "Another", "tags": []}]''')
-        self.assertEquals(len(test_data), 2)
-        loader.process_each(test_data)
-
-        self.assertEquals(len(loader.rows), 2)
-        second_row = loader.rows[1]
-        self.assertEquals(second_row['id'], 'Another')
-        # works for missing dict keys
-        self.assertIsNone(second_row['count'])
-        # and missing list indices
-        self.assertIsNone(second_row['tags_0'])
-
-    def test_load_json(self):
-        outline = {"map": [['author', 'source.author'], ['message', 'message.original']], "collection": "nodes"}
-        loader = Json2Csv(outline)
-        with open('fixtures/data.json') as f:
-            loader.load(f)
-
-        first_row = loader.rows[0]
-        self.assertEqual(first_row['author'], 'Someone')
-        second_row = loader.rows[1]
-        self.assertEqual(second_row['author'], 'Another')
-        third_row = loader.rows[2]
-        self.assertEqual(third_row['author'], 'Me too')
-
-    def test_load_bare_json(self):
-        outline = {"map": [['author', 'source.author'], ['message', 'message.original']]}
-        loader = Json2Csv(outline)
-        with open('fixtures/bare_data.json') as f:
-            loader.load(f)
-
-        first_row = loader.rows[0]
-        self.assertEqual(first_row['author'], 'Someone')
-        second_row = loader.rows[1]
-        self.assertEqual(second_row['author'], 'Another')
-        third_row = loader.rows[2]
-        self.assertEqual(third_row['author'], 'Me too')
-
-    def test_write_csv(self):
-        pass
-
-
-class TestMultiLineJson2Csv(unittest.TestCase):
-
-    def test_line_delimited(self):
-        outline = {"map": [['author', 'source.author'], ['message', 'message.original']]}
-        loader = MultiLineJson2Csv(outline)
-        with open('fixtures/line_delimited.json') as f:
-            loader.load(f)
-
-        first_row = loader.rows[0]
-        self.assertEqual(first_row['author'], 'Someone')
-        second_row = loader.rows[1]
-        self.assertEqual(second_row['author'], 'Another')
-        third_row = loader.rows[2]
-        self.assertEqual(third_row['author'], 'Me too')
-
-
-class TestGenOutline(unittest.TestCase):
-
-    def test_basic(self):
-        with open('fixtures/data.json') as json_file:
-            outline = make_outline(json_file, False, 'nodes')
-            expected = {
-                'collection': 'nodes',
-                'map': [
-                    ('message_Revised', 'message.Revised'),
-                    ('message_original', 'message.original'),
-                    ('source_author', 'source.author'),
-                ]
-            }
-            self.assertEqual(outline, expected)
-
-    def test_deeply_nested(self):
-        with open('fixtures/deeply_nested.json') as json_file:
-            outline = make_outline(json_file, False, 'nodes')
-            expected = {
-                'collection': 'nodes',
-                'map': [
-                    ('one_0_two_0_three_0', 'one.0.two.0.three.0'),
-                    ('one_0_two_0_three_1', 'one.0.two.0.three.1'),
-                    ('one_0_two_0_three_2', 'one.0.two.0.three.2'),
-                    ('one_0_two_1_three_0', 'one.0.two.1.three.0'),
-                    ('one_0_two_1_three_1', 'one.0.two.1.three.1'),
-                    ('one_0_two_1_three_2', 'one.0.two.1.three.2'),
-                ]
-            }
-            self.assertEqual(outline, expected)
-
-    def test_different_keys_per_row(self):
-        "Outline should contain the union of the keys."
-        with open('fixtures/different_keys_per_row.json') as json_file:
-            outline = make_outline(json_file, False, 'nodes')
-            expected = {
-                'collection': 'nodes',
-                'map': [
-                    ('tags_0', 'tags.0'),
-                    ('tags_1', 'tags.1'),
-                    ('tags_2', 'tags.2'),
-                    ('that', 'that'),
-                    ('theother', 'theother'),
-                    ('this', 'this'),
-                ]
-            }
-            self.assertEqual(outline, expected)
-
-    def test_line_delimited(self):
-        with open('fixtures/line_delimited.json') as json_file:
-            outline = make_outline(json_file, True, None)
-            expected = {
-                'map': [
-                    ('message_Revised', 'message.Revised'),
-                    ('message_original', 'message.original'),
-                    ('source_author', 'source.author'),
-                ]
-            }
-            self.assertEqual(outline, expected)

From 06b1cd63112fc9358c237935988cadd4ae076559 Mon Sep 17 00:00:00 2001
From: Brahmanand Singh <backbencherg@gmail.com>
Date: Sat, 26 Jun 2021 20:00:41 +0530
Subject: [PATCH 4/7] Adjusted test cases accordingly

---
 tests.py | 194 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 194 insertions(+)

diff --git a/tests.py b/tests.py
index e69de29..91b9e44 100644
--- a/tests.py
+++ b/tests.py
@@ -0,0 +1,194 @@
+import unittest
+import json
+from json2csv import Json2Csv, MultiLineJson2Csv
+from gen_outline import make_outline
+
+
+class TestJson2Csv(unittest.TestCase):
+
+    def test_init(self):
+        outline = {'map': [['some_header', 'some_key']]}
+        loader = Json2Csv(outline)
+        self.assertIn('some_header', loader.key_map)
+
+        self.assertRaises(ValueError, Json2Csv, None)
+
+        self.assertRaises(ValueError, Json2Csv, {})
+
+    def test_process_row(self):
+        """Given a valid key-map and data, it should return a valid row"""
+        outline = {'map': [['id', '_id'], ['count', 'count']]}
+        loader = Json2Csv(outline)
+        test_data = json.loads('{"_id" : "Someone","count" : 1}')
+        row = loader.process_row(test_data)
+
+        self.assertIs(type(row), dict)
+        self.assertIn('id', row.keys())
+        self.assertIn('count', row.keys())
+
+        self.assertEqual(row['id'], 'Someone')
+        self.assertEqual(row['count'], 1)
+
+    def test_process_row_nested_data(self):
+        """Ensure that nested keys (with . notation) are processed"""
+        key_map = {"map": [['author', 'source.author'], ['message', 'message.original']]}
+        loader = Json2Csv(key_map)
+        test_data = json.loads(
+            '{"source": {"author": "Someone"}, "message": {"original": "Hey!", "Revised": "Hey yo!"}}'
+        )
+        row = loader.process_row(test_data)
+
+        self.assertIs(type(row), dict)
+        self.assertIn('author', row.keys())
+        self.assertIn('message', row.keys())
+
+        self.assertEqual(row['author'], 'Someone')
+        self.assertEqual(row['message'], 'Hey!')
+
+    def test_process_row_array_index(self):
+        """Ensure that array indices are properly handled as part of the dot notation"""
+        pass
+
+    def test_process_each(self):
+        outline = {'map': [['id', '_id'], ['count', 'count']], 'collection': 'result'}
+        loader = Json2Csv(outline)
+
+        test_data = json.loads('{"result":[{"_id" : "Someone","count" : 1}]}')
+        loader.process_each(test_data)
+
+        self.assertEqual(len(loader.rows), 1)
+        row = loader.rows[0]
+        self.assertIs(type(row), dict)
+        self.assertIn('id', row.keys())
+        self.assertIn('count', row.keys())
+
+        self.assertEqual(row['id'], 'Someone')
+        self.assertEqual(row['count'], 1)
+
+    def test_process_each_optional_key(self):
+        """Ensure a key that is not always present won't prevent data extraction
+        Where the data is missing, None is returned
+        """
+        outline = {'map': [['id', '_id'], ['count', 'count'], ['tags_0', 'tags.0']]}
+        loader = Json2Csv(outline)
+
+        test_data = json.loads('''[
+          {"_id": "Someone","count": 1, "tags": ["super"]},
+          {"_id": "Another", "tags": []}]''')
+        self.assertEqual(len(test_data), 2)
+        loader.process_each(test_data)
+
+        self.assertEqual(len(loader.rows), 2)
+        second_row = loader.rows[1]
+        self.assertEqual(second_row['id'], 'Another')
+        # works for missing dict keys
+        self.assertIsNone(second_row['count'])
+        # and missing list indices
+        self.assertIsNone(second_row['tags_0'])
+
+    def test_load_json(self):
+        outline = {"map": [['author', 'source.author'], ['message', 'message.original']], "collection": "nodes"}
+        loader = Json2Csv(outline)
+        with open('fixtures/data.json') as f:
+            loader.load(f)
+
+        first_row = loader.rows[0]
+        self.assertEqual(first_row['author'], 'Someone')
+        second_row = loader.rows[1]
+        self.assertEqual(second_row['author'], 'Another')
+        third_row = loader.rows[2]
+        self.assertEqual(third_row['author'], 'Me too')
+
+    def test_load_bare_json(self):
+        outline = {"map": [['author', 'source.author'], ['message', 'message.original']]}
+        loader = Json2Csv(outline)
+        with open('fixtures/bare_data.json') as f:
+            loader.load(f)
+
+        first_row = loader.rows[0]
+        self.assertEqual(first_row['author'], 'Someone')
+        second_row = loader.rows[1]
+        self.assertEqual(second_row['author'], 'Another')
+        third_row = loader.rows[2]
+        self.assertEqual(third_row['author'], 'Me too')
+
+    def test_write_csv(self):
+        pass
+
+
+class TestMultiLineJson2Csv(unittest.TestCase):
+
+    def test_line_delimited(self):
+        outline = {"map": [['author', 'source.author'], ['message', 'message.original']]}
+        loader = MultiLineJson2Csv(outline)
+        with open('fixtures/line_delimited.json') as f:
+            loader.load(f)
+
+        first_row = loader.rows[0]
+        self.assertEqual(first_row['author'], 'Someone')
+        second_row = loader.rows[1]
+        self.assertEqual(second_row['author'], 'Another')
+        third_row = loader.rows[2]
+        self.assertEqual(third_row['author'], 'Me too')
+
+
+class TestGenOutline(unittest.TestCase):
+
+    def test_basic(self):
+        with open('fixtures/data.json') as json_file:
+            outline = make_outline(json_file, False, 'nodes')
+            expected = {
+                'collection': 'nodes',
+                'map': [
+                    ('source_author', 'source.author'),
+                    ('message_original', 'message.original'),
+                    ('message_Revised', 'message.Revised'),
+                ]
+            }
+            self.assertEqual(outline, expected)
+
+    def test_deeply_nested(self):
+        with open('fixtures/deeply_nested.json') as json_file:
+            outline = make_outline(json_file, False, 'nodes')
+            expected = {
+                'collection': 'nodes',
+                'map': [
+                    ('one_0_two_0_three_0', 'one.0.two.0.three.0'),
+                    ('one_0_two_0_three_1', 'one.0.two.0.three.1'),
+                    ('one_0_two_0_three_2', 'one.0.two.0.three.2'),
+                    ('one_0_two_1_three_0', 'one.0.two.1.three.0'),
+                    ('one_0_two_1_three_1', 'one.0.two.1.three.1'),
+                    ('one_0_two_1_three_2', 'one.0.two.1.three.2'),
+                ]
+            }
+            self.assertEqual(outline, expected)
+
+    def test_different_keys_per_row(self):
+        "Outline should contain the union of the keys."
+        with open('fixtures/different_keys_per_row.json') as json_file:
+            outline = make_outline(json_file, False, 'nodes')
+            expected = {
+                'collection': 'nodes',
+                'map': [
+                    ('this', 'this'),
+                    ('tags_0', 'tags.0'),
+                    ('tags_1', 'tags.1'),
+                    ('tags_2', 'tags.2'),
+                    ('that', 'that'),
+                    ('theother', 'theother'),
+                ]
+            }
+            self.assertEqual(outline, expected)
+
+    def test_line_delimited(self):
+        with open('fixtures/line_delimited.json') as json_file:
+            outline = make_outline(json_file, True, None)
+            expected = {
+                'map': [
+                    ('source_author', 'source.author'),
+                    ('message_original', 'message.original'),
+                    ('message_Revised', 'message.Revised'),
+                ]
+            }
+            self.assertEqual(outline, expected)
+

From 8ca5c6fb5d5db874f6ae2a7f9b749a203d4f8bf4 Mon Sep 17 00:00:00 2001
From: Brahmanand Singh <backbencherg@gmail.com>
Date: Sat, 26 Jun 2021 20:32:38 +0530
Subject: [PATCH 5/7] fixes

---
 fixtures/data.outline.json                   | 17 ++++
 fixtures/different_keys_per_row.outline.json | 29 +++++++
 fixtures/outline.json                        | 20 +++--
 gen_outline.py                               | 11 ++-
 get_outline.py                               | 88 --------------------
 5 files changed, 68 insertions(+), 97 deletions(-)
 create mode 100644 fixtures/data.outline.json
 create mode 100644 fixtures/different_keys_per_row.outline.json
 mode change 100755 => 100644 gen_outline.py
 delete mode 100644 get_outline.py

diff --git a/fixtures/data.outline.json b/fixtures/data.outline.json
new file mode 100644
index 0000000..50ad405
--- /dev/null
+++ b/fixtures/data.outline.json
@@ -0,0 +1,17 @@
+{
+  "map": [
+    [
+      "message_original",
+      "message.original"
+    ],
+    [
+      "source_author",
+      "source.author"
+    ],
+    [
+      "message_Revised",
+      "message.Revised"
+    ]
+  ],
+  "collection": "nodes"
+}
\ No newline at end of file
diff --git a/fixtures/different_keys_per_row.outline.json b/fixtures/different_keys_per_row.outline.json
new file mode 100644
index 0000000..afcf8b8
--- /dev/null
+++ b/fixtures/different_keys_per_row.outline.json
@@ -0,0 +1,29 @@
+{
+  "collection": "nodes",
+  "map": [
+    [
+      "that",
+      "that"
+    ],
+    [
+      "tags_2",
+      "tags.2"
+    ],
+    [
+      "theother",
+      "theother"
+    ],
+    [
+      "this",
+      "this"
+    ],
+    [
+      "tags_0",
+      "tags.0"
+    ],
+    [
+      "tags_1",
+      "tags.1"
+    ]
+  ]
+}
\ No newline at end of file
diff --git a/fixtures/outline.json b/fixtures/outline.json
index b8d09b8..9844e7f 100644
--- a/fixtures/outline.json
+++ b/fixtures/outline.json
@@ -1,7 +1,17 @@
 {
-    "map": [
-        ["author", "source.author"],
-        ["message", "message.original"]
+  "map": [
+    [
+      "source_author",
+      "source.author"
     ],
-    "collection": "nodes"
-}
+    [
+      "message_Revised",
+      "message.Revised"
+    ],
+    [
+      "message_original",
+      "message.original"
+    ]
+  ],
+  "collection": "nodes"
+}
\ No newline at end of file
diff --git a/gen_outline.py b/gen_outline.py
old mode 100755
new mode 100644
index 8ab20aa..3067d6b
--- a/gen_outline.py
+++ b/gen_outline.py
@@ -6,7 +6,7 @@
 def key_paths(d):
     def helper(path, x):
         if isinstance(x, dict):
-            for k, v in x.iteritems():
+            for k, v in x.items():
                 for ret in helper(path + [k], v):
                     yield ret
         elif isinstance(x, list):
@@ -39,7 +39,9 @@ def path_join(path, sep='.'):
 def key_map_to_list(key_map):
     # We convert to strings *after* sorting so that array indices come out
     # in the correct order.
-    return [(path_join(k, '_'), path_join(k)) for k in sorted(key_map.keys())]
+    # return [(path_join(k, '_'), path_join(k)) for k in sorted(key_map.keys())]
+    # to get keys in the order defined in json ,not doing any sort
+    return [(path_join(k, '_'), path_join(k)) for k in key_map.keys()]
 
 def make_outline(json_file, each_line, collection_key):
     if each_line:
@@ -77,9 +79,10 @@ def main():
     if outfile is None:
         fileName, fileExtension = os.path.splitext(args.json_file.name)
         outfile = fileName + '.outline.json'
-
+#not soring the json keys
     with open(outfile, 'w') as f:
-        json.dump(outline, f, indent=2, sort_keys=True)
+        json.dump(outline, f, indent=2, sort_keys=False)
 
 if __name__ == '__main__':
     main()
+
diff --git a/get_outline.py b/get_outline.py
deleted file mode 100644
index 3067d6b..0000000
--- a/get_outline.py
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/usr/bin/env python
-
-import json
-import os, os.path
-
-def key_paths(d):
-    def helper(path, x):
-        if isinstance(x, dict):
-            for k, v in x.items():
-                for ret in helper(path + [k], v):
-                    yield ret
-        elif isinstance(x, list):
-            for i, item in enumerate(x):
-                for ret in helper(path + [i], item):
-                    yield ret
-        else:
-            yield path
-    return helper([], d)
-
-def line_iter(f):
-    for line in f:
-        yield json.loads(line)
-
-def coll_iter(f, coll_key):
-    data = json.load(f)
-    for obj in data[coll_key]:
-        yield obj
-
-def gather_key_map(iterator):
-    key_map = {}
-    for d in iterator:
-        for path in key_paths(d):
-            key_map[tuple(path)] = True
-    return key_map
-
-def path_join(path, sep='.'):
-    return sep.join(str(k) for k in path)
-
-def key_map_to_list(key_map):
-    # We convert to strings *after* sorting so that array indices come out
-    # in the correct order.
-    # return [(path_join(k, '_'), path_join(k)) for k in sorted(key_map.keys())]
-    # to get keys in the order defined in json ,not doing any sort
-    return [(path_join(k, '_'), path_join(k)) for k in key_map.keys()]
-
-def make_outline(json_file, each_line, collection_key):
-    if each_line:
-        iterator = line_iter(json_file)
-    else:
-        iterator = coll_iter(json_file, collection_key)
-
-    key_map = gather_key_map(iterator)
-    outline = {'map': key_map_to_list(key_map)}
-    if collection_key:
-        outline['collection'] = collection_key
-
-    return outline
-
-def init_parser():
-    import argparse
-    parser = argparse.ArgumentParser(description="Generate an outline file for json2csv.py")
-    parser.add_argument('json_file', type=argparse.FileType('r'),
-                        help="Path to JSON data file to analyze")
-    parser.add_argument('-o', '--output-file', type=str, default=None,
-                        help="Path to outline file to output")
-    group = parser.add_mutually_exclusive_group(required=True)
-    group.add_argument('-e', '--each-line', action="store_true", default=False,
-                       help="Process each line of JSON file separately")
-    group.add_argument('-c', '--collection', type=str, default=None,
-                       help="Key in JSON of array to process", metavar="KEY")
-
-    return parser
-
-def main():
-    parser = init_parser()
-    args = parser.parse_args()
-    outline = make_outline(args.json_file, args.each_line, args.collection)
-    outfile = args.output_file
-    if outfile is None:
-        fileName, fileExtension = os.path.splitext(args.json_file.name)
-        outfile = fileName + '.outline.json'
-#not soring the json keys
-    with open(outfile, 'w') as f:
-        json.dump(outline, f, indent=2, sort_keys=False)
-
-if __name__ == '__main__':
-    main()
-

From 35087d15dacd8c27bdc85b655b270ae4832168c5 Mon Sep 17 00:00:00 2001
From: Brahmanand Singh <backbencherg@gmail.com>
Date: Sat, 26 Jun 2021 23:40:27 +0530
Subject: [PATCH 6/7] fixes and unittest

---
 fixtures/data.outline.json                   | 32 +++++------
 fixtures/different_keys_per_row.outline.json | 56 ++++++++++----------
 fixtures/line_delimited.outline.json         | 16 ++++++
 fixtures/nested_json_frt.json                | 46 ++++++++++++++++
 fixtures/nested_json_frt.outline.json        | 37 +++++++++++++
 fixtures/outline.json                        | 32 +++++------
 gen_outline.py                               |  9 ++--
 tests.py                                     | 11 ++--
 8 files changed, 168 insertions(+), 71 deletions(-)
 mode change 100644 => 100755 fixtures/data.outline.json
 mode change 100644 => 100755 fixtures/different_keys_per_row.outline.json
 create mode 100755 fixtures/line_delimited.outline.json
 create mode 100755 fixtures/nested_json_frt.json
 create mode 100755 fixtures/nested_json_frt.outline.json
 mode change 100644 => 100755 fixtures/outline.json
 mode change 100644 => 100755 gen_outline.py

diff --git a/fixtures/data.outline.json b/fixtures/data.outline.json
old mode 100644
new mode 100755
index 50ad405..a93ad96
--- a/fixtures/data.outline.json
+++ b/fixtures/data.outline.json
@@ -1,17 +1,17 @@
-{
-  "map": [
-    [
-      "message_original",
-      "message.original"
-    ],
-    [
-      "source_author",
-      "source.author"
-    ],
-    [
-      "message_Revised",
-      "message.Revised"
-    ]
-  ],
-  "collection": "nodes"
+{
+  "collection": "nodes",
+  "map": [
+    [
+      "message_Revised",
+      "message.Revised"
+    ],
+    [
+      "message_original",
+      "message.original"
+    ],
+    [
+      "source_author",
+      "source.author"
+    ]
+  ]
 }
\ No newline at end of file
diff --git a/fixtures/different_keys_per_row.outline.json b/fixtures/different_keys_per_row.outline.json
old mode 100644
new mode 100755
index afcf8b8..c502506
--- a/fixtures/different_keys_per_row.outline.json
+++ b/fixtures/different_keys_per_row.outline.json
@@ -1,29 +1,29 @@
-{
-  "collection": "nodes",
-  "map": [
-    [
-      "that",
-      "that"
-    ],
-    [
-      "tags_2",
-      "tags.2"
-    ],
-    [
-      "theother",
-      "theother"
-    ],
-    [
-      "this",
-      "this"
-    ],
-    [
-      "tags_0",
-      "tags.0"
-    ],
-    [
-      "tags_1",
-      "tags.1"
-    ]
-  ]
+{
+  "map": [
+    [
+      "this",
+      "this"
+    ],
+    [
+      "tags_0",
+      "tags.0"
+    ],
+    [
+      "tags_1",
+      "tags.1"
+    ],
+    [
+      "tags_2",
+      "tags.2"
+    ],
+    [
+      "that",
+      "that"
+    ],
+    [
+      "theother",
+      "theother"
+    ]
+  ],
+  "collection": "nodes"
 }
\ No newline at end of file
diff --git a/fixtures/line_delimited.outline.json b/fixtures/line_delimited.outline.json
new file mode 100755
index 0000000..447be50
--- /dev/null
+++ b/fixtures/line_delimited.outline.json
@@ -0,0 +1,16 @@
+{
+  "map": [
+    [
+      "source_author",
+      "source.author"
+    ],
+    [
+      "message_original",
+      "message.original"
+    ],
+    [
+      "message_Revised",
+      "message.Revised"
+    ]
+  ]
+}
\ No newline at end of file
diff --git a/fixtures/nested_json_frt.json b/fixtures/nested_json_frt.json
new file mode 100755
index 0000000..d7001bd
--- /dev/null
+++ b/fixtures/nested_json_frt.json
@@ -0,0 +1,46 @@
+{
+    "fruit": [
+        {
+            "name": "Apple",
+            "binomial name": "Malus domestica",
+            "major_producers": [
+                "China",
+                "United States",
+                "Turkey"
+            ],
+            "nutrition": {
+                "carbohydrates": "13.81g",
+                "fat": "0.17g",
+                "protein": "0.26g"
+            }
+        },
+        {
+            "name": "Orange",
+            "binomial name": "Citrus x sinensis",
+            "major_producers": [
+                "Brazil",
+                "United States",
+                "India"
+            ],
+            "nutrition": {
+                "carbohydrates": "11.75g",
+                "fat": "0.12g",
+                "protein": "0.94g"
+            }
+        },
+        {
+            "name": "Mango",
+            "binomial name": "Mangifera indica",
+            "major_producers": [
+                "India",
+                "China",
+                "Thailand"
+            ],
+            "nutrition": {
+                "carbohydrates": "15g",
+                "fat": "0.38g",
+                "protein": "0.82g"
+            }
+        }
+    ]
+}
\ No newline at end of file
diff --git a/fixtures/nested_json_frt.outline.json b/fixtures/nested_json_frt.outline.json
new file mode 100755
index 0000000..0c2e5da
--- /dev/null
+++ b/fixtures/nested_json_frt.outline.json
@@ -0,0 +1,37 @@
+{
+  "map": [
+    [
+      "name",
+      "name"
+    ],
+    [
+      "binomial name",
+      "binomial name"
+    ],
+    [
+      "major_producers_0",
+      "major_producers.0"
+    ],
+    [
+      "major_producers_1",
+      "major_producers.1"
+    ],
+    [
+      "major_producers_2",
+      "major_producers.2"
+    ],
+    [
+      "nutrition_carbohydrates",
+      "nutrition.carbohydrates"
+    ],
+    [
+      "nutrition_fat",
+      "nutrition.fat"
+    ],
+    [
+      "nutrition_protein",
+      "nutrition.protein"
+    ]
+  ],
+  "collection": "fruit"
+}
\ No newline at end of file
diff --git a/fixtures/outline.json b/fixtures/outline.json
old mode 100644
new mode 100755
index 9844e7f..a93ad96
--- a/fixtures/outline.json
+++ b/fixtures/outline.json
@@ -1,17 +1,17 @@
-{
-  "map": [
-    [
-      "source_author",
-      "source.author"
-    ],
-    [
-      "message_Revised",
-      "message.Revised"
-    ],
-    [
-      "message_original",
-      "message.original"
-    ]
-  ],
-  "collection": "nodes"
+{
+  "collection": "nodes",
+  "map": [
+    [
+      "message_Revised",
+      "message.Revised"
+    ],
+    [
+      "message_original",
+      "message.original"
+    ],
+    [
+      "source_author",
+      "source.author"
+    ]
+  ]
 }
\ No newline at end of file
diff --git a/gen_outline.py b/gen_outline.py
old mode 100644
new mode 100755
index 3067d6b..51dfb23
--- a/gen_outline.py
+++ b/gen_outline.py
@@ -39,9 +39,9 @@ def path_join(path, sep='.'):
 def key_map_to_list(key_map):
     # We convert to strings *after* sorting so that array indices come out
     # in the correct order.
-    # return [(path_join(k, '_'), path_join(k)) for k in sorted(key_map.keys())]
+    return [(path_join(k, '_'), path_join(k)) for k in sorted(key_map.keys())]
     # to get keys in the order defined in json ,not doing any sort
-    return [(path_join(k, '_'), path_join(k)) for k in key_map.keys()]
+    # return [(path_join(k, '_'), path_join(k)) for k in key_map.keys()]
 
 def make_outline(json_file, each_line, collection_key):
     if each_line:
@@ -79,10 +79,9 @@ def main():
     if outfile is None:
         fileName, fileExtension = os.path.splitext(args.json_file.name)
         outfile = fileName + '.outline.json'
-#not soring the json keys
+#sort the key , this will make sure that the output file will always have the same order of fields 
     with open(outfile, 'w') as f:
-        json.dump(outline, f, indent=2, sort_keys=False)
+        json.dump(outline, f, indent=2, sort_keys=True)
 
 if __name__ == '__main__':
     main()
-
diff --git a/tests.py b/tests.py
index 91b9e44..580a121 100644
--- a/tests.py
+++ b/tests.py
@@ -140,9 +140,9 @@ def test_basic(self):
             expected = {
                 'collection': 'nodes',
                 'map': [
-                    ('source_author', 'source.author'),
-                    ('message_original', 'message.original'),
                     ('message_Revised', 'message.Revised'),
+                    ('message_original', 'message.original'),
+                    ('source_author', 'source.author'),
                 ]
             }
             self.assertEqual(outline, expected)
@@ -170,12 +170,12 @@ def test_different_keys_per_row(self):
             expected = {
                 'collection': 'nodes',
                 'map': [
-                    ('this', 'this'),
                     ('tags_0', 'tags.0'),
                     ('tags_1', 'tags.1'),
                     ('tags_2', 'tags.2'),
                     ('that', 'that'),
                     ('theother', 'theother'),
+                    ('this', 'this'),
                 ]
             }
             self.assertEqual(outline, expected)
@@ -185,10 +185,9 @@ def test_line_delimited(self):
             outline = make_outline(json_file, True, None)
             expected = {
                 'map': [
-                    ('source_author', 'source.author'),
-                    ('message_original', 'message.original'),
                     ('message_Revised', 'message.Revised'),
+                    ('message_original', 'message.original'),
+                    ('source_author', 'source.author'),
                 ]
             }
             self.assertEqual(outline, expected)
-

From f6c0d26b0599bef92b44f1357762a35afbd53bb2 Mon Sep 17 00:00:00 2001
From: Brahmanand Singh <backbencherg@gmail.com>
Date: Sun, 27 Jun 2021 00:23:45 +0530
Subject: [PATCH 7/7] added output for issue#38

---
 fixtures/nested_json_frt.csv | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100755 fixtures/nested_json_frt.csv

diff --git a/fixtures/nested_json_frt.csv b/fixtures/nested_json_frt.csv
new file mode 100755
index 0000000..4719cf4
--- /dev/null
+++ b/fixtures/nested_json_frt.csv
@@ -0,0 +1,4 @@
+"name","binomial name","major_producers_0","major_producers_1","major_producers_2","nutrition_carbohydrates","nutrition_fat","nutrition_protein"
+"Apple","Malus domestica","China","United States","Turkey","13.81g","0.17g","0.26g"
+"Orange","Citrus x sinensis","Brazil","United States","India","11.75g","0.12g","0.94g"
+"Mango","Mangifera indica","India","China","Thailand","15g","0.38g","0.82g"