611351f696
- Update to 0.12.0 - Two BUILD_DEPENDS are only RUN_DEPENDS, move them - Patch some files out of MANIFEST.in - Cherry pick upstream issue/commit [1] - Define regression-test target and TEST_DEPENDS - Document USES=display in support of unit tests [1] https://github.com/pydata/pandas/issues/4353 PR: ports/180889 Submitted by: John W. O'Brien <john@saltant.com> (maintainer)
162 lines
7.4 KiB
Text
162 lines
7.4 KiB
Text
This patch is adapted from the following upstream commit.
|
|
|
|
[PATCH] TST/BUG/CLN: make stata IO tests use temporary files for writing
|
|
c98e09951f74bf3445f8144363afaa769be69940
|
|
|
|
--- pandas/io/tests/test_stata.py.orig
|
|
+++ pandas/io/tests/test_stata.py
|
|
@@ -10,9 +10,8 @@ import numpy as np
|
|
|
|
from pandas.core.frame import DataFrame, Series
|
|
from pandas.io.parsers import read_csv
|
|
-from pandas.io.stata import read_stata, StataReader, StataWriter
|
|
+from pandas.io.stata import read_stata, StataReader
|
|
import pandas.util.testing as tm
|
|
-from pandas.util.testing import ensure_clean
|
|
from pandas.util.misc import is_little_endian
|
|
|
|
|
|
@@ -27,15 +26,12 @@ class StataTests(unittest.TestCase):
|
|
self.dta3 = os.path.join(self.dirpath, 'stata3.dta')
|
|
self.csv3 = os.path.join(self.dirpath, 'stata3.csv')
|
|
self.dta4 = os.path.join(self.dirpath, 'stata4.dta')
|
|
- self.dta5 = os.path.join(self.dirpath, 'stata5.dta')
|
|
- self.dta6 = os.path.join(self.dirpath, 'stata6.dta')
|
|
self.dta7 = os.path.join(self.dirpath, 'cancer.dta')
|
|
self.csv7 = os.path.join(self.dirpath, 'cancer.csv')
|
|
self.dta8 = os.path.join(self.dirpath, 'tbl19-3.dta')
|
|
self.csv8 = os.path.join(self.dirpath, 'tbl19-3.csv')
|
|
self.dta9 = os.path.join(self.dirpath, 'lbw.dta')
|
|
self.csv9 = os.path.join(self.dirpath, 'lbw.csv')
|
|
- self.dta10 = os.path.join(self.dirpath, 'stata10.dta')
|
|
|
|
def read_dta(self, file):
|
|
return read_stata(file, convert_dates=True)
|
|
@@ -46,9 +42,11 @@ class StataTests(unittest.TestCase):
|
|
def test_read_dta1(self):
|
|
reader = StataReader(self.dta1)
|
|
parsed = reader.data()
|
|
- # Pandas uses np.nan as missing value. Thus, all columns will be of type float, regardless of their name.
|
|
+ # Pandas uses np.nan as missing value.
|
|
+ # Thus, all columns will be of type float, regardless of their name.
|
|
expected = DataFrame([(np.nan, np.nan, np.nan, np.nan, np.nan)],
|
|
- columns=['float_miss', 'double_miss', 'byte_miss', 'int_miss', 'long_miss'])
|
|
+ columns=['float_miss', 'double_miss', 'byte_miss',
|
|
+ 'int_miss', 'long_miss'])
|
|
|
|
for i, col in enumerate(parsed.columns):
|
|
np.testing.assert_almost_equal(
|
|
@@ -90,7 +88,9 @@ class StataTests(unittest.TestCase):
|
|
np.datetime64('NaT')
|
|
)
|
|
],
|
|
- columns=['datetime_c', 'datetime_big_c', 'date', 'weekly_date', 'monthly_date', 'quarterly_date', 'half_yearly_date', 'yearly_date']
|
|
+ columns=['datetime_c', 'datetime_big_c', 'date', 'weekly_date',
|
|
+ 'monthly_date', 'quarterly_date', 'half_yearly_date',
|
|
+ 'yearly_date']
|
|
)
|
|
|
|
with warnings.catch_warnings(record=True) as w:
|
|
@@ -125,34 +125,40 @@ class StataTests(unittest.TestCase):
|
|
["nine", "two", 9, np.nan, "nine"],
|
|
["ten", "one", "ten", np.nan, "ten"]
|
|
],
|
|
- columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled', 'labeled_with_missings', 'float_labelled'])
|
|
+ columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled',
|
|
+ 'labeled_with_missings', 'float_labelled'])
|
|
|
|
tm.assert_frame_equal(parsed, expected)
|
|
|
|
- def test_write_dta5(self):
|
|
+ def test_read_write_dta5(self):
|
|
if not is_little_endian():
|
|
- raise nose.SkipTest("known failure of test_write_dta5 on non-little endian")
|
|
+ raise nose.SkipTest("known failure of test_write_dta5 on "
|
|
+ "non-little endian")
|
|
|
|
original = DataFrame([(np.nan, np.nan, np.nan, np.nan, np.nan)],
|
|
- columns=['float_miss', 'double_miss', 'byte_miss', 'int_miss', 'long_miss'])
|
|
+ columns=['float_miss', 'double_miss', 'byte_miss',
|
|
+ 'int_miss', 'long_miss'])
|
|
original.index.name = 'index'
|
|
|
|
- with ensure_clean(self.dta5) as path:
|
|
+ with tm.ensure_clean() as path:
|
|
original.to_stata(path, None, False)
|
|
written_and_read_again = self.read_dta(path)
|
|
- tm.assert_frame_equal(written_and_read_again.set_index('index'), original)
|
|
+ tm.assert_frame_equal(written_and_read_again.set_index('index'),
|
|
+ original)
|
|
|
|
def test_write_dta6(self):
|
|
if not is_little_endian():
|
|
- raise nose.SkipTest("known failure of test_write_dta6 on non-little endian")
|
|
+ raise nose.SkipTest("known failure of test_write_dta6 on "
|
|
+ "non-little endian")
|
|
|
|
original = self.read_csv(self.csv3)
|
|
original.index.name = 'index'
|
|
|
|
- with ensure_clean(self.dta6) as path:
|
|
+ with tm.ensure_clean() as path:
|
|
original.to_stata(path, None, False)
|
|
written_and_read_again = self.read_dta(path)
|
|
- tm.assert_frame_equal(written_and_read_again.set_index('index'), original)
|
|
+ tm.assert_frame_equal(written_and_read_again.set_index('index'),
|
|
+ original)
|
|
|
|
@nose.tools.nottest
|
|
def test_read_dta7(self):
|
|
@@ -190,29 +196,30 @@ class StataTests(unittest.TestCase):
|
|
decimal=3
|
|
)
|
|
|
|
- def test_read_dta10(self):
|
|
+ def test_read_write_dta10(self):
|
|
if not is_little_endian():
|
|
- raise nose.SkipTest("known failure of test_write_dta10 on non-little endian")
|
|
+ raise nose.SkipTest("known failure of test_write_dta10 on "
|
|
+ "non-little endian")
|
|
|
|
- original = DataFrame(
|
|
- data=
|
|
- [
|
|
- ["string", "object", 1, 1.1, np.datetime64('2003-12-25')]
|
|
- ],
|
|
- columns=['string', 'object', 'integer', 'float', 'datetime'])
|
|
+ original = DataFrame(data=[["string", "object", 1, 1.1,
|
|
+ np.datetime64('2003-12-25')]],
|
|
+ columns=['string', 'object', 'integer', 'float',
|
|
+ 'datetime'])
|
|
original["object"] = Series(original["object"], dtype=object)
|
|
original.index.name = 'index'
|
|
|
|
- with ensure_clean(self.dta10) as path:
|
|
+ with tm.ensure_clean() as path:
|
|
original.to_stata(path, {'datetime': 'tc'}, False)
|
|
written_and_read_again = self.read_dta(path)
|
|
- tm.assert_frame_equal(written_and_read_again.set_index('index'), original)
|
|
+ tm.assert_frame_equal(written_and_read_again.set_index('index'),
|
|
+ original)
|
|
|
|
def test_stata_doc_examples(self):
|
|
- with ensure_clean(self.dta5) as path:
|
|
+ with tm.ensure_clean() as path:
|
|
df = DataFrame(np.random.randn(10, 2), columns=list('AB'))
|
|
df.to_stata(path)
|
|
|
|
+
|
|
if __name__ == '__main__':
|
|
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
|
|
exit=False)
|
|
--- pandas/util/testing.py.orig
|
|
+++ pandas/util/testing.py
|
|
@@ -86,7 +86,7 @@ def set_trace():
|
|
#------------------------------------------------------------------------------
|
|
# contextmanager to ensure the file cleanup
|
|
@contextmanager
|
|
-def ensure_clean(filename = None):
|
|
+def ensure_clean(filename=None):
|
|
# if we are not passed a filename, generate a temporary
|
|
if filename is None:
|
|
filename = tempfile.mkstemp()[1]
|