compare pyarrow vs panda read_parquet
This commit is contained in:
@@ -13,25 +13,49 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 1,
|
||||
"id": "4b3733cd-3c59-4cb4-a343-4cf75ea36ceb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "NameError",
|
||||
"evalue": "name 'time' is not defined",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
||||
"\u001b[31mNameError\u001b[39m Traceback (most recent call last)",
|
||||
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[2]\u001b[39m\u001b[32m, line 6\u001b[39m\n\u001b[32m 2\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mpandas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mpd\u001b[39;00m\n\u001b[32m 4\u001b[39m URL_DATA = \u001b[33m'\u001b[39m\u001b[33mhttps://storage.data.gov.my/transportation/cars_2025.parquet\u001b[39m\u001b[33m'\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m6\u001b[39m start_time = \u001b[43mtime\u001b[49m.perf_counter()\n\u001b[32m 7\u001b[39m df = pd.read_parquet(URL_DATA)\n\u001b[32m 8\u001b[39m end_time = time.perf_counter()\n",
|
||||
"\u001b[31mNameError\u001b[39m: name 'time' is not defined"
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.53\n",
|
||||
" date_reg type maker model colour fuel \\\n",
|
||||
"0 2025-01-01 motokar BYD Seal white electric \n",
|
||||
"1 2025-01-01 window_van Cam Placer-X yellow greendiesel \n",
|
||||
"2 2025-01-01 jip Chery Jaecoo J7 green petrol \n",
|
||||
"3 2025-01-01 jip Chery Jaecoo J7 silver petrol \n",
|
||||
"4 2025-01-01 jip Chery Tiggo grey petrol \n",
|
||||
"... ... ... ... ... ... ... \n",
|
||||
"396824 2025-06-30 window_van Zeekr 009 white electric \n",
|
||||
"396825 2025-06-30 jip Zeekr X beige electric \n",
|
||||
"396826 2025-06-30 jip Zeekr X grey electric \n",
|
||||
"396827 2025-06-30 jip Zeekr X grey electric \n",
|
||||
"396828 2025-06-30 jip Zeekr X green electric \n",
|
||||
"\n",
|
||||
" state \n",
|
||||
"0 Rakan Niaga \n",
|
||||
"1 Johor \n",
|
||||
"2 Rakan Niaga \n",
|
||||
"3 Rakan Niaga \n",
|
||||
"4 Rakan Niaga \n",
|
||||
"... ... \n",
|
||||
"396824 W.P. Kuala Lumpur \n",
|
||||
"396825 W.P. Kuala Lumpur \n",
|
||||
"396826 W.P. Kuala Lumpur \n",
|
||||
"396827 W.P. Kuala Lumpur \n",
|
||||
"396828 W.P. Kuala Lumpur \n",
|
||||
"\n",
|
||||
"[396829 rows x 7 columns]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# If not already installed, do: pip install pandas fastparquet\n",
|
||||
"import pandas as pd\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"URL_DATA = 'https://storage.data.gov.my/transportation/cars_2025.parquet'\n",
|
||||
"\n",
|
||||
@@ -48,7 +72,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 2,
|
||||
"id": "72c39de4-4b98-4dca-8152-3a51f84d86f5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -56,6 +80,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.18\n",
|
||||
" date_reg type maker model colour fuel \\\n",
|
||||
"0 2025-01-01 motokar BYD Seal white electric \n",
|
||||
"1 2025-01-01 window_van Cam Placer-X yellow greendiesel \n",
|
||||
@@ -88,11 +113,17 @@
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"URL_DATA = 'https://storage.data.gov.my/transportation/cars_2025.parquet'\n",
|
||||
"\n",
|
||||
"# Assuming 'your_file.parquet' is the path to your Parquet file\n",
|
||||
"start_time = time.perf_counter()\n",
|
||||
"df = pd.read_parquet(URL_DATA, engine='pyarrow')\n",
|
||||
"end_time = time.perf_counter()\n",
|
||||
"total_time = end_time - start_time\n",
|
||||
"print(f'{total_time:0.2f}')\n",
|
||||
"\n",
|
||||
"if 'date' in df.columns: df['date'] = pd.to_datetime(df['date'])\n",
|
||||
"\n",
|
||||
"# print(df.head())\n",
|
||||
|
Reference in New Issue
Block a user