in python/sedona/geopandas/geodataframe.py [0:0]
def __getitem__(self, key: Any) -> Any:
"""
Get item from GeoDataFrame by key.
Parameters
----------
key : str, list, slice, ndarray or Series
- If key is a string, returns a Series for that column
- If key is a list of strings, returns a new GeoDataFrame with selected columns
- If key is a slice or array, returns rows in the GeoDataFrame
Returns
-------
Any
Series, GeoDataFrame, or other objects depending on the key type.
Examples
--------
>>> from shapely.geometry import Point
>>> from sedona.geopandas import GeoDataFrame
>>>
>>> data = {'geometry': [Point(0, 0), Point(1, 1)], 'value': [1, 2]}
>>> gdf = GeoDataFrame(data)
>>> gdf['value']
0 1
1 2
Name: value, dtype: int64
"""
from sedona.geopandas import GeoSeries
# Handle column access by name
if isinstance(key, str):
# Access column directly from the spark DataFrame
column_name = key
# Check if column exists
if column_name not in self.columns:
raise KeyError(f"Column '{column_name}' does not exist")
# Get column data from spark_frame
spark_df = self._internal.spark_frame.select(column_name)
pandas_df = spark_df.toPandas()
# Check if this is a geometry column
field = next(
(f for f in self._internal.spark_frame.schema.fields if f.name == key),
None,
)
if field and (
field.dataType.typeName() == "geometrytype"
or field.dataType.typeName() == "binary"
):
# Return as GeoSeries for geometry columns
return GeoSeries(pandas_df[column_name])
else:
# Return as regular pandas Series for non-geometry columns
from pyspark.pandas import Series
return Series(pandas_df[column_name])
# Handle list of column names
elif isinstance(key, list) and all(isinstance(k, str) for k in key):
# Check if all columns exist
missing_cols = [k for k in key if k not in self.columns]
if missing_cols:
raise KeyError(f"Columns {missing_cols} do not exist")
# Select columns from the spark DataFrame
spark_df = self._internal.spark_frame.select(*key)
pandas_df = spark_df.toPandas()
# Return as GeoDataFrame
return GeoDataFrame(pandas_df)
# Handle row selection via slice or boolean indexing
else:
# For now, convert to pandas first for row-based operations
# This could be optimized later for better performance
pandas_df = self._internal.spark_frame.toPandas()
selected_rows = pandas_df[key]
return GeoDataFrame(selected_rows)