Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Separate MultiIndex names from levels
  • Loading branch information
topper-123 committed Oct 14, 2019
commit ad43b307d87b9c5787f015dda156d637e4427d66
12 changes: 12 additions & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,18 @@ is respected in indexing. (:issue:`24076`, :issue:`16785`)
df['2019-01-01 12:00:00+04:00':'2019-01-01 13:00:00+04:00']


.. _whatsnew_0250.api_breaking.MultiIndex._names:


``MultiIndex.levels`` do not hold level names any longer
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

A :class:`MultiIndex` previously stored the level names as attributes of each of its
:attr:`MultiIndex.levels`. From Pandas 0.25, the names are only accessed through
:attr:`MultiIndex.names` (which was also possible previously). This is done in order to
make :attr:`MultiIndex.levels` more similar to :attr:`CategoricalIndex.categories`.


.. _whatsnew_0250.api_breaking.multi_indexing:


Expand Down
3 changes: 2 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7772,7 +7772,8 @@ def _count_level(self, level, axis=0, numeric_only=False):
if isinstance(level, str):
level = count_axis._get_level_number(level)

level_index = count_axis.levels[level]
level_name = count_axis._names[level]
level_index = count_axis.levels[level]._shallow_copy(name=level_name)
level_codes = ensure_int64(count_axis.codes[level])
counts = lib.count_level_2d(mask, level_codes, len(level_index), axis=0)

Expand Down
15 changes: 8 additions & 7 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ def __new__(
result._set_levels(levels, copy=copy, validate=False)
result._set_codes(codes, copy=copy, validate=False)

result._names = [None for _ in levels]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[None] * len(levels)

if names is not None:
# handles name validation
result._set_names(names)
Expand Down Expand Up @@ -1216,7 +1217,7 @@ def __len__(self):
return len(self.codes[0])

def _get_names(self):
return FrozenList(level.name for level in self.levels)
return FrozenList(self._names)

def _set_names(self, names, level=None, validate=True):
"""
Expand Down Expand Up @@ -1262,7 +1263,7 @@ def _set_names(self, names, level=None, validate=True):
level = [self._get_level_number(l) for l in level]

# set the name
for l, name in zip(level, names):
for lev, name in zip(level, names):
if name is not None:
# GH 20527
# All items in 'names' need to be hashable:
Expand All @@ -1272,7 +1273,7 @@ def _set_names(self, names, level=None, validate=True):
self.__class__.__name__
)
)
self.levels[l].rename(name, inplace=True)
self._names[lev] = name

names = property(
fset=_set_names, fget=_get_names, doc="""\nNames of levels in MultiIndex.\n"""
Expand Down Expand Up @@ -1582,13 +1583,13 @@ def _get_level_values(self, level, unique=False):
values : ndarray
"""

values = self.levels[level]
lev = self.levels[level]
level_codes = self.codes[level]
name = self._names[level]
if unique:
level_codes = algos.unique(level_codes)
filled = algos.take_1d(values._values, level_codes, fill_value=values._na_value)
values = values._shallow_copy(filled)
return values
filled = algos.take_1d(lev._values, level_codes, fill_value=lev._na_value)
return lev._shallow_copy(filled, name=name)

def get_level_values(self, level):
"""
Expand Down
17 changes: 12 additions & 5 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,10 +259,13 @@ def get_new_values(self):
def get_new_columns(self):
if self.value_columns is None:
if self.lift == 0:
return self.removed_level
lev = self.removed_level._shallow_copy()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why wouldn't you do
lev = self.removed_level._shallow_copy(name=self.removed_name) ?

Copy link
Contributor Author

@topper-123 topper-123 Jul 5, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_shallow_copy and rename and other indirect methods to set the .name all call ._set_names, which does a lot of checks. Those checks are not needed in these internal functionality, as the name has already been validated.

Perhaps have a fastpath parameter in _set_names?

lev.name = self.removed_name
return lev

lev = self.removed_level
return lev.insert(0, lev._na_value)
lev = self.removed_level.insert(0, item=self.removed_level._na_value)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would use .rename()

lev.name = self.removed_name
return lev

stride = len(self.removed_level) + self.lift
width = len(self.value_columns)
Expand Down Expand Up @@ -301,7 +304,9 @@ def get_new_index(self):
lev, lab = self.new_index_levels[0], result_codes[0]
if (lab == -1).any():
lev = lev.insert(len(lev), lev._na_value)
return lev.take(lab)
new_index = lev.take(lab)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would use .rename()

new_index.name = self.new_index_names[0]
return new_index

return MultiIndex(
levels=self.new_index_levels,
Expand Down Expand Up @@ -661,7 +666,9 @@ def _convert_level_number(level_num, columns):
new_names = this.columns.names[:-1]
new_columns = MultiIndex.from_tuples(unique_groups, names=new_names)
else:
new_columns = unique_groups = this.columns.levels[0]
new_columns = this.columns.levels[0]._shallow_copy()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use name= here

new_columns.name = this.columns.names[0]
unique_groups = new_columns

# time to ravel the values
new_data = {}
Expand Down
6 changes: 4 additions & 2 deletions pandas/io/json/_table_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,10 @@ def build_table_schema(data, index=True, primary_key=None, version=True):

if index:
if data.index.nlevels > 1:
for level in data.index.levels:
fields.append(convert_pandas_type_to_json_field(level))
for level, name in zip(data.index.levels, data.index.names):
new_field = convert_pandas_type_to_json_field(level)
new_field["name"] = name
fields.append(new_field)
else:
fields.append(convert_pandas_type_to_json_field(data.index))

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_alter_axes.py
Original file line number Diff line number Diff line change
Expand Up @@ -978,7 +978,7 @@ def test_reset_index(self, float_frame):
):
values = lev.take(level_codes)
name = names[i]
tm.assert_index_equal(values, Index(deleveled[name]))
tm.assert_index_equal(values, Index(deleveled[name]), check_names=False)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this changed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lev.take(level_codes) doesn't provide a name any more, while a rest index does provides its Series with a name (as it should.

I've added a test assert values.name is None to make this more explicit.


stacked.index.names = [None, None]
deleveled2 = stacked.reset_index()
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/multi/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def test_astype(idx):
actual = idx.astype("O")
assert_copy(actual.levels, expected.levels)
assert_copy(actual.codes, expected.codes)
assert [level.name for level in actual.levels] == list(expected.names)
assert actual.names == list(expected.names)

with pytest.raises(TypeError, match="^Setting.*dtype.*object"):
idx.astype(np.dtype(int))
Expand Down
8 changes: 5 additions & 3 deletions pandas/tests/indexes/multi/test_constructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_constructor_single_level():
levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"]
)
assert isinstance(result, MultiIndex)
expected = Index(["foo", "bar", "baz", "qux"], name="first")
expected = Index(["foo", "bar", "baz", "qux"])
tm.assert_index_equal(result.levels[0], expected)
assert result.names == ["first"]

Expand Down Expand Up @@ -292,8 +292,9 @@ def test_from_arrays_empty():
# 1 level
result = MultiIndex.from_arrays(arrays=[[]], names=["A"])
assert isinstance(result, MultiIndex)
expected = Index([], name="A")
expected = Index([])
tm.assert_index_equal(result.levels[0], expected)
assert result.names == ["A"]

# N levels
for N in [2, 3]:
Expand Down Expand Up @@ -439,8 +440,9 @@ def test_from_product_empty_zero_levels():

def test_from_product_empty_one_level():
result = MultiIndex.from_product([[]], names=["A"])
expected = pd.Index([], name="A")
expected = pd.Index([])
tm.assert_index_equal(result.levels[0], expected)
assert result.names == ["A"]


@pytest.mark.parametrize(
Expand Down
25 changes: 11 additions & 14 deletions pandas/tests/indexes/multi/test_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,28 +27,25 @@ def test_index_name_retained():


def test_changing_names(idx):

# names should be applied to levels
level_names = [level.name for level in idx.levels]
check_level_names(idx, idx.names)
assert [level.name for level in idx.levels] == [None, None]

view = idx.view()
copy = idx.copy()
shallow_copy = idx._shallow_copy()

# changing names should change level names on object
# changing names should not change level names on object
new_names = [name + "a" for name in idx.names]
idx.names = new_names
check_level_names(idx, new_names)
check_level_names(idx, [None, None])

# but not on copies
check_level_names(view, level_names)
check_level_names(copy, level_names)
check_level_names(shallow_copy, level_names)
# and not on copies
check_level_names(view, [None, None])
check_level_names(copy, [None, None])
check_level_names(shallow_copy, [None, None])

# and copies shouldn't change original
shallow_copy.names = [name + "c" for name in shallow_copy.names]
check_level_names(idx, new_names)
check_level_names(idx, [None, None])


def test_take_preserve_name(idx):
Expand Down Expand Up @@ -84,7 +81,8 @@ def test_names(idx, index_names):
# names are assigned in setup
names = index_names
level_names = [level.name for level in idx.levels]
assert names == level_names
assert names == ["first", "second"]
assert level_names == [None, None]

# setting bad names on existing
index = idx
Expand All @@ -111,9 +109,8 @@ def test_names(idx, index_names):

# names are assigned
index.names = ["a", "b"]
ind_names = list(index.names)
level_names = [level.name for level in index.levels]
assert ind_names == level_names
assert level_names == [None, None]


def test_duplicate_level_names_access_raises(idx):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexes/multi/test_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ def check_level_names(index, names):
def test_reindex(idx):
result, indexer = idx.reindex(list(idx[:4]))
assert isinstance(result, MultiIndex)
check_level_names(result, idx[:4].names)
check_level_names(result, [None, None])

result, indexer = idx.reindex(list(idx))
assert isinstance(result, MultiIndex)
assert indexer is None
check_level_names(result, idx.names)
check_level_names(result, [None, None])


def test_reindex_level(idx):
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/indexes/multi/test_reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@ def test_insert(idx):
# key not contained in all levels
new_index = idx.insert(0, ("abc", "three"))

exp0 = Index(list(idx.levels[0]) + ["abc"], name="first")
exp0 = Index(list(idx.levels[0]) + ["abc"])
tm.assert_index_equal(new_index.levels[0], exp0)
assert new_index.names == ["first", "second"]

exp1 = Index(list(idx.levels[1]) + ["three"], name="second")
exp1 = Index(list(idx.levels[1]) + ["three"])
tm.assert_index_equal(new_index.levels[1], exp1)
assert new_index[0] == ("abc", "three")

Expand Down
12 changes: 6 additions & 6 deletions pandas/tests/reshape/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -1219,8 +1219,10 @@ def test_concat_keys_specific_levels(self):
names=["group_key"],
)

tm.assert_index_equal(result.columns.levels[0], Index(level, name="group_key"))
assert result.columns.names[0] == "group_key"
tm.assert_index_equal(result.columns.levels[0], Index(level))
tm.assert_index_equal(result.columns.levels[1], Index([0, 1, 2, 3]))

assert result.columns.names == ["group_key", None]

def test_concat_dataframe_keys_bug(self, sort):
t1 = DataFrame(
Expand Down Expand Up @@ -1409,10 +1411,8 @@ def test_concat_keys_and_levels(self):
keys=[("foo", "one"), ("foo", "two"), ("baz", "one"), ("baz", "two")],
names=["first", "second"],
)
assert result.index.names == ("first", "second") + (None,)
tm.assert_index_equal(
result.index.levels[0], Index(["baz", "foo"], name="first")
)
assert result.index.names == ("first", "second", None)
tm.assert_index_equal(result.index.levels[0], Index(["baz", "foo"]))

def test_concat_keys_levels_no_overlap(self):
# GH #1406
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/reshape/test_reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -618,7 +618,7 @@ def test_reshaping_multi_index_categorical(self):
df.index.names = ["major", "minor"]
df["str"] = "foo"

dti = df.index.levels[0]
dti = df.index.levels[0].set_names(["major"])

df["category"] = df["str"].astype("category")
result = df["category"].unstack()
Expand Down
32 changes: 16 additions & 16 deletions pandas/tests/test_multilevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ def test_count_level_corner(self):
df = self.frame[:0]
result = df.count(level=0)
expected = (
DataFrame(index=s.index.levels[0], columns=df.columns)
DataFrame(index=s.index.levels[0].set_names(["first"]), columns=df.columns)
.fillna(0)
.astype(np.int64)
)
Expand Down Expand Up @@ -976,13 +976,11 @@ def test_count(self):

result = series.count(level="b")
expect = self.series.count(level=1)
tm.assert_series_equal(result, expect, check_names=False)
assert result.index.name == "b"
tm.assert_series_equal(result, expect)

result = series.count(level="a")
expect = self.series.count(level=0)
tm.assert_series_equal(result, expect, check_names=False)
assert result.index.name == "a"
tm.assert_series_equal(result, expect)

msg = "Level x not found"
with pytest.raises(KeyError, match=msg):
Expand Down Expand Up @@ -1036,10 +1034,10 @@ def aggf(x):
# for good measure, groupby detail
level_index = frame._get_axis(axis).levels[level]

tm.assert_index_equal(leftside._get_axis(axis), level_index)
tm.assert_index_equal(rightside._get_axis(axis), level_index)
tm.assert_index_equal(leftside._get_axis(axis), level_index, check_names=False)
tm.assert_index_equal(rightside._get_axis(axis), level_index, check_names=False)

tm.assert_frame_equal(leftside, rightside)
tm.assert_frame_equal(leftside, rightside, check_names=False)

def test_stat_op_corner(self):
obj = Series([10.0], index=MultiIndex.from_tuples([(2, 3)]))
Expand Down Expand Up @@ -1639,12 +1637,12 @@ def test_constructor_with_tz(self):
)

result = MultiIndex.from_arrays([index, columns])
tm.assert_index_equal(result.levels[0], index)
tm.assert_index_equal(result.levels[1], columns)
tm.assert_index_equal(result.levels[0], index, check_names=False)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I now find these tests very confusing that we lose the names on the levels themselves. (I know that's the point of this PR).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe add a .set_names(index.name) (for example) and remove the check_names arg (so its the default of True)

Copy link
Contributor Author

@topper-123 topper-123 Jul 6, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've made changes so we avoid check_names=False (so is implicitly True).

tm.assert_index_equal(result.levels[1], columns, check_names=False)

result = MultiIndex.from_arrays([Series(index), Series(columns)])
tm.assert_index_equal(result.levels[0], index)
tm.assert_index_equal(result.levels[1], columns)
tm.assert_index_equal(result.levels[0], index, check_names=False)
tm.assert_index_equal(result.levels[1], columns, check_names=False)

def test_set_index_datetime(self):
# GH 3950
Expand Down Expand Up @@ -1672,12 +1670,14 @@ def test_set_index_datetime(self):
expected = expected.tz_localize("UTC").tz_convert("US/Pacific")

df = df.set_index("label", append=True)
tm.assert_index_equal(df.index.levels[0], expected)
tm.assert_index_equal(df.index.levels[1], Index(["a", "b"], name="label"))
tm.assert_index_equal(df.index.levels[0], expected, check_names=False)
tm.assert_index_equal(df.index.levels[1], Index(["a", "b"]))
assert df.index.names == ["datetime", "label"]

df = df.swaplevel(0, 1)
tm.assert_index_equal(df.index.levels[0], Index(["a", "b"], name="label"))
tm.assert_index_equal(df.index.levels[1], expected)
tm.assert_index_equal(df.index.levels[0], Index(["a", "b"]))
tm.assert_index_equal(df.index.levels[1], expected, check_names=False)
assert df.index.names == ["label", "datetime"]

df = DataFrame(np.random.random(6))
idx1 = pd.DatetimeIndex(
Expand Down