Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
additional mdc migration
  • Loading branch information
asl3 committed Jul 8, 2024
commit a2de740966a6a51fee850675e94810e33a407d9a
2 changes: 1 addition & 1 deletion core/src/main/scala/org/apache/spark/SparkEnv.scala
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ object SparkEnv extends Logging {
name: String, endpointCreator: => RpcEndpoint):
RpcEndpointRef = {
if (isDriver) {
logInfo("Registering " + name)
logInfo(log"Registering ${MDC(LogKeys.NAME, name)}")
rpcEnv.setupEndpoint(name, endpointCreator)
} else {
RpcUtils.makeDriverRef(name, conf, rpcEnv)
Expand Down
104 changes: 104 additions & 0 deletions dev/structured-logging-style.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#!/usr/bin/env python3

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import os
import sys
import re
import glob

from sparktestsupport import SPARK_HOME

def main():
log_pattern = r'log(?:Info|Warning|Error)\((.*?)\)'
inner_log_pattern = r'".*?"\.format\(.*\)|s?".*?(?:\$|\+(?!.*?[ |\t].*s?")).*'
compiled_inner_log_pattern = re.compile(inner_log_pattern, flags=re.DOTALL)

# Regex patterns for file paths to exclude from the Structured Logging style check
excluded_file_patterns = [
'[Tt]est',
'/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala',
'/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala'
]

nonmigrated_files = {}

scala_files = glob.glob(os.path.join(SPARK_HOME, '**', '*.scala'), recursive=True)

for file in scala_files:
skip_file = False
for exclude_pattern in excluded_file_patterns:
if re.search(exclude_pattern, file):
skip_file = True
break

if not skip_file:
with open(file, 'r') as f:
content = f.read()

log_statements = re.finditer(log_pattern, content, re.DOTALL)
# log_statements = [statement.group(1).strip() for statement in log_statements]

if log_statements:
nonmigrated_files[file] = []
for log_statement in log_statements:
if compiled_inner_log_pattern.fullmatch(log_statement.group(1)):
start_pos = log_statement.start()
preceding_content = content[:start_pos]
line_number = preceding_content.count('\n') + 1
start_char = start_pos - preceding_content.rfind('\n') - 1
nonmigrated_files[file].append((line_number, start_char, log_statement.group(1)))

# for log_statement in log_statements:
# if compiled_inner_log_pattern.search(log_statement):
# nonmigrated_files[file].append()


# matches = list(pattern.finditer(content))
# matches2 = pattern.findall(content)
#
# for m in matches2:
# print(f"****** ${m}")
#
# if matches:
# nonmigrated_files[file] = []
# for match in matches:
# start_pos = match.start()
# preceding_content = content[:start_pos]
# line_number = preceding_content.count('\n') + 1
# start_char = start_pos - preceding_content.rfind('\n') - 1
# nonmigrated_files[file].append((line_number, start_char))

if not nonmigrated_files:
print("Structured logging style check passed.")
sys.exit(0)
else:
for file_path, issues in nonmigrated_files.items():
if issues:
print(file_path)
# for file_path, issues in nonmigrated_files.items():
# for line_number, start_char in issues:
# pass
# print(f"[error] {file_path}:{line_number}:{start_char}")
# print("""[error]\t\tLogging message should use log"..." instead of s"..." and variables should be wrapped in `MDC`s.
# Refer to Structured Logging Framework guidelines in the file `internal/Logging.scala`.""")

sys.exit(-1)

if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
Expand Up @@ -276,10 +276,11 @@ private[streaming] class BlockGenerator(
}

// At this point, state is StoppedGeneratingBlock. So drain the queue of to-be-pushed blocks.
logInfo("Pushing out the last " + blocksForPushing.size() + " blocks")
logInfo(log"Pushing out the last " +
log"${MDC(LogKeys.NUM_BLOCK_IDS, blocksForPushing.size())} blocks")
while (!blocksForPushing.isEmpty) {
val block = blocksForPushing.take()
logDebug(s"Pushing block $block")
logDebug(log"Pushing block ${MDC(LogKeys.BLOCK, block)}")
pushBlock(block)
logInfo(log"Blocks left to push ${MDC(LogKeys.NUM_BLOCK_IDS, blocksForPushing.size())}")
}
Expand Down