From 2cafb82d0ee4d45ce9135872258dccb8f74d14a5 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Sun, 27 Apr 2014 11:27:41 -0400
Subject: [PATCH] PERF: improved performance of compatible pickles (GH6899)

---
 doc/source/release.rst |  1 +
 doc/source/v0.14.0.txt |  2 ++
 pandas/io/pickle.py    | 18 +++++++++++++++---
 vb_suite/packers.py    | 31 ++++++++++++++++++++-----------
 4 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/doc/source/release.rst b/doc/source/release.rst
index fce5f2f93e68b..d100541ecbf92 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -292,6 +292,7 @@ Improvements to existing features
   specified (:issue:`6607`)
 - ``read_excel`` can now read milliseconds in Excel dates and times with xlrd >= 0.9.3. (:issue:`5945`)
 - ``pivot_table`` can now accept ``Grouper`` by ``index`` and ``columns`` keywords (:issue:`6913`)
+- Improved performance of compatible pickles (:issue:`6899`)
 
 .. _release.bug_fixes-0.14.0:
 
diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt
index acacbd1c0b43c..43096b133f26e 100644
--- a/doc/source/v0.14.0.txt
+++ b/doc/source/v0.14.0.txt
@@ -522,6 +522,8 @@ Performance
   (e.g. MonthEnd,BusinessMonthEnd), (:issue:`6479`)
 - Improve performance of ``CustomBusinessDay`` (:issue:`6584`)
 - improve performance of slice indexing on Series with string keys (:issue:`6341`, :issue:`6372`)
+- Performance improvements in timedelta conversions for integer dtypes (:issue:`6754`)
+- Improved performance of compatible pickles (:issue:`6899`)
 
 Experimental
 ~~~~~~~~~~~~
diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py
index 915c1e9ae1574..e80bfec9c8dba 100644
--- a/pandas/io/pickle.py
+++ b/pandas/io/pickle.py
@@ -34,16 +34,28 @@ def read_pickle(path):
     """
 
     def try_read(path, encoding=None):
+        # try with cPickle
         # try with current pickle, if we have a Type Error then
         # try with the compat pickle to handle subclass changes
         # pass encoding only if its not None as py2 doesn't handle
         # the param
+
+        # cpickle
+        # GH 6899
         try:
             with open(path, 'rb') as fh:
-                return pc.load(fh, encoding=encoding, compat=False)
+                return pkl.load(fh)
         except:
-            with open(path, 'rb') as fh:
-                return pc.load(fh, encoding=encoding, compat=True)
+
+            # reg/patched pickle
+            try:
+                with open(path, 'rb') as fh:
+                    return pc.load(fh, encoding=encoding, compat=False)
+
+            # compat pickle
+            except:
+                with open(path, 'rb') as fh:
+                    return pc.load(fh, encoding=encoding, compat=True)
 
     try:
         return try_read(path)
diff --git a/vb_suite/packers.py b/vb_suite/packers.py
index f2eac0e28cd44..ca0193e9b2c10 100644
--- a/vb_suite/packers.py
+++ b/vb_suite/packers.py
@@ -7,6 +7,7 @@
 import os
 import pandas as pd
 from pandas.core import common as com
+from random import randrange
 
 f = '__test__.msg'
 def remove(f):
@@ -15,10 +16,18 @@ def remove(f):
    except:
        pass
 
-index = date_range('20000101',periods=50000,freq='H')
-df = DataFrame({'float1' : randn(50000),
-                'float2' : randn(50000)},
+N=100000
+C=5
+index = date_range('20000101',periods=N,freq='H')
+df = DataFrame(dict([ ("float{0}".format(i),randn(N)) for i in range(C) ]),
                index=index)
+
+N=100000
+C=5
+index = date_range('20000101',periods=N,freq='H')
+df2 = DataFrame(dict([ ("float{0}".format(i),randn(N)) for i in range(C) ]),
+                index=index)
+df2['object'] = ['%08x'%randrange(16**8) for _ in range(N)]
 remove(f)
 """
 
@@ -26,7 +35,7 @@ def remove(f):
 # msgpack
 
 setup = common_setup + """
-df.to_msgpack(f)
+df2.to_msgpack(f)
 """
 
 packers_read_pack = Benchmark("pd.read_msgpack(f)", setup, start_date=start_date)
@@ -34,13 +43,13 @@ def remove(f):
 setup = common_setup + """
 """
 
-packers_write_pack = Benchmark("df.to_msgpack(f)", setup, cleanup="remove(f)", start_date=start_date)
+packers_write_pack = Benchmark("df2.to_msgpack(f)", setup, cleanup="remove(f)", start_date=start_date)
 
 #----------------------------------------------------------------------
 # pickle
 
 setup = common_setup + """
-df.to_pickle(f)
+df2.to_pickle(f)
 """
 
 packers_read_pickle = Benchmark("pd.read_pickle(f)", setup, start_date=start_date)
@@ -48,7 +57,7 @@ def remove(f):
 setup = common_setup + """
 """
 
-packers_write_pickle = Benchmark("df.to_pickle(f)", setup, cleanup="remove(f)", start_date=start_date)
+packers_write_pickle = Benchmark("df2.to_pickle(f)", setup, cleanup="remove(f)", start_date=start_date)
 
 #----------------------------------------------------------------------
 # csv
@@ -68,7 +77,7 @@ def remove(f):
 # hdf store
 
 setup = common_setup + """
-df.to_hdf(f,'df')
+df2.to_hdf(f,'df')
 """
 
 packers_read_hdf_store = Benchmark("pd.read_hdf(f,'df')", setup, start_date=start_date)
@@ -76,13 +85,13 @@ def remove(f):
 setup = common_setup + """
 """
 
-packers_write_hdf_store = Benchmark("df.to_hdf(f,'df')", setup, cleanup="remove(f)", start_date=start_date)
+packers_write_hdf_store = Benchmark("df2.to_hdf(f,'df')", setup, cleanup="remove(f)", start_date=start_date)
 
 #----------------------------------------------------------------------
 # hdf table
 
 setup = common_setup + """
-df.to_hdf(f,'df',table=True)
+df2.to_hdf(f,'df',table=True)
 """
 
 packers_read_hdf_table = Benchmark("pd.read_hdf(f,'df')", setup, start_date=start_date)
@@ -90,7 +99,7 @@ def remove(f):
 setup = common_setup + """
 """
 
-packers_write_hdf_table = Benchmark("df.to_hdf(f,'df',table=True)", setup, cleanup="remove(f)", start_date=start_date)
+packers_write_hdf_table = Benchmark("df2.to_hdf(f,'df',table=True)", setup, cleanup="remove(f)", start_date=start_date)
 
 #----------------------------------------------------------------------
 # json