Skip to content
Closed
Next Next commit
init
  • Loading branch information
LuciferYang committed Nov 3, 2024
commit e0f3b70002a99ba99d3616ff4f2312c428290110
98 changes: 98 additions & 0 deletions dev/stream-state-gen-protos.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#!/usr/bin/env bash

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
set -ex

if [[ $# -gt 0 ]]; then
echo "Illegal number of parameters."
echo "Usage: ./dev/stream-state-gen-protos.sh"
exit -1
fi


SPARK_HOME="$(cd "`dirname $0`"/..; pwd)"
cd "$SPARK_HOME"


OUTPUT_PATH=${SPARK_HOME}/python/pyspark/sql/streaming/proto
if [[ $# -eq 1 ]]; then
rm -Rf $1
mkdir -p $1
OUTPUT_PATH=$1
fi

pushd sql/core/src/main

LICENSE=$(cat <<'EOF'
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
EOF)
echo "$LICENSE" > /tmp/tmp_licence


# Delete the old generated protobuf files.
rm -Rf gen

# Now, regenerate the new files
buf generate --debug -vvv

# We need to edit the generate python files to account for the actual package location and not
# the one generated by proto.
for f in `find gen/proto/python -name "*.py*"`; do
# First fix the imports.
if [[ $f == *_pb2.py ]]; then
sed -e 's/from spark.streaming import/from pyspark.sql.streaming.proto import/g' $f > $f.tmp
mv $f.tmp $f
# Now fix the module name in the serialized descriptor.
sed -e "s/DESCRIPTOR, 'spark.streaming/DESCRIPTOR, 'pyspark.sql.streaming.proto/g" $f > $f.tmp
mv $f.tmp $f
elif [[ $f == *.pyi ]]; then
sed -e 's/import spark.streaming./import pyspark.sql.streaming.proto./g' -e 's/spark.streaming./pyspark.sql.streaming.proto./g' -e '/ *@typing_extensions\.final/d' $f > $f.tmp
mv $f.tmp $f
fi

# Prepend the Apache licence header to the files.
cp $f $f.bak
cat /tmp/tmp_licence $f.bak > $f

rm $f.bak
done

black --config $SPARK_HOME/dev/pyproject.toml gen/proto/python

# Last step copy the result files to the destination module.
for f in `find gen/proto/python -name "*.py*"`; do
cp $f $OUTPUT_PATH
done

# Clean up everything.
rm -Rf gen
1 change: 1 addition & 0 deletions dev/tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -59,5 +59,6 @@ exclude =
*python/pyspark/worker.pyi,
*python/pyspark/java_gateway.pyi,
*python/pyspark/sql/connect/proto/*,
*python/pyspark/sql/streaming/proto/*,
*/venv/*
max-line-length = 100
Loading