Differential D8684 Diff 40955 examples/docs_snippets/docs_snippets/legacy/dagster_pandas_guide/summary_stats_pipeline.py
Changeset View
Changeset View
Standalone View
Standalone View
examples/docs_snippets/docs_snippets/legacy/dagster_pandas_guide/summary_stats_pipeline.py
Show All 12 Lines | return { | ||||
"max_end_time": max(dataframe["end_time"]).strftime("%Y-%m-%d"), | "max_end_time": max(dataframe["end_time"]).strftime("%Y-%m-%d"), | ||||
"num_unique_bikes": str(dataframe["bike_id"].nunique()), | "num_unique_bikes": str(dataframe["bike_id"].nunique()), | ||||
"n_rows": len(dataframe), | "n_rows": len(dataframe), | ||||
"columns": str(dataframe.columns), | "columns": str(dataframe.columns), | ||||
} | } | ||||
SummaryStatsTripDataFrame = create_dagster_pandas_dataframe_type( | SummaryStatsTripDataFrame = create_dagster_pandas_dataframe_type( | ||||
name="SummaryStatsTripDataFrame", event_metadata_fn=compute_trip_dataframe_summary_statistics | name="SummaryStatsTripDataFrame", | ||||
event_metadata_fn=compute_trip_dataframe_summary_statistics, | |||||
) | ) | ||||
# end_summary | # end_summary | ||||
@solid( | @solid( | ||||
output_defs=[ | output_defs=[ | ||||
OutputDefinition( | OutputDefinition( | ||||
name="summary_stats_trip_dataframe", dagster_type=SummaryStatsTripDataFrame | name="summary_stats_trip_dataframe", | ||||
dagster_type=SummaryStatsTripDataFrame, | |||||
) | ) | ||||
] | ] | ||||
) | ) | ||||
def load_summary_stats_trip_dataframe() -> DataFrame: | def load_summary_stats_trip_dataframe() -> DataFrame: | ||||
return read_csv( | return read_csv( | ||||
script_relative_path("./ebike_trips.csv"), | script_relative_path("./ebike_trips.csv"), | ||||
parse_dates=["start_time", "end_time"], | parse_dates=["start_time", "end_time"], | ||||
date_parser=lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f"), | date_parser=lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f"), | ||||
) | ) | ||||
@pipeline | @pipeline | ||||
def summary_stats_pipeline(): | def summary_stats_pipeline(): | ||||
load_summary_stats_trip_dataframe() | load_summary_stats_trip_dataframe() |