例子:
fio-30749 [036] 5651360.257707: block_rq_issue: 8,0 WS 4096 () 1367650688 + 8 [fio]
<idle>-0 [036] 5651360.257768: block_rq_complete: 8,0 WS () 1367650688 + 8 [0]
import os
import sys
import re
from openpyxl import Workbook
def parse_ftrace_file(file_path):
#issue_pattern = re.compile(r'(\w+-\d+) \[(\d+)\] (\d+\.\d+): block_rq_issue: (\d+),(\d+) (\w+) (\d+) \(\) (\d+) \+ (\d+)')
issue_pattern = re.compile(r'(\d+)\.(\d+): block_rq_issue: (\d+),(\d+) (\w+) (\d+) \(\) (\d+) \+ (\d+)')
complete_pattern = re.compile(r'(\d+)\.(\d+): block_rq_complete: (\d+),(\d+) (\w+) \(\) (\d+) \+ (\d+)')
with open(file_path, 'r') as f:
lines = f.readlines()
request_details = []
request_counts = {}
for line in lines:
print(line)
issue_match = issue_pattern.search(line)
if issue_match:
issue_time = float(issue_match.group(1)) + float(issue_match.group(2)) / 1e6
operation = issue_match.group(5)
lba = int(issue_match.group(7))
size = int(issue_match.group(8)) # 获取请求的大小
request_details.append((operation, lba, size, issue_time, None, None))
request_counts[operation] = request_counts.get(operation, 0) + 1
continue
complete_match = complete_pattern.search(line)
if complete_match:
complete_time = float(complete_match.group(1)) + float(complete_match.group(2)) / 1e6
complete_request_id = (complete_match.group(5), int(complete_match.group(6)), int(complete_match.group(7)))
print("complete_request_id:",complete_request_id)
for i, details in enumerate(request_details):
if details[0:3] == complete_request_id:
latency = complete_time - details[3]
request_details[i] = (details[0], details[1], details[2], details[3], complete_time, latency)
break
return request_details, request_counts
def calculate_percentages(request_counts):
total_count = sum(request_counts.values())
percentages = {operation: count / total_count * 100 for operation, count in request_counts.items()}
return percentages
def save_to_excel(request_details, request_counts, output_file):
wb = Workbook()
ws = wb.active
ws['A1'] = 'Operation'
ws['B1'] = 'Count'
ws['C1'] = 'Percentage'
percentages = calculate_percentages(request_counts)
for i, (operation, count) in enumerate(request_counts.items(), start=2):
ws[f'A{i}'] = operation
ws[f'B{i}'] = count
for i, (operation, percentage) in enumerate(percentages.items(), start=2):
ws[f'C{i}'] = percentage
ws['A{}'.format(len(request_counts) + 2)] = 'Operation'
ws['B{}'.format(len(request_counts) + 2)] = 'LBA'
ws['C{}'.format(len(request_counts) + 2)] = 'Size'
ws['D{}'.format(len(request_counts) + 2)] = 'Issue Time'
ws['E{}'.format(len(request_counts) + 2)] = 'Complete Time'
ws['F{}'.format(len(request_counts) + 2)] = 'Latency'
for i, details in enumerate(request_details, start=len(request_counts) + 3):
ws[f'A{i}'] = details[0]
ws[f'B{i}'] = details[1]
ws[f'C{i}'] = details[2]
ws[f'D{i}'] = details[3]
ws[f'E{i}'] = details[4]
ws[f'F{i}'] = details[5]
wb.save(output_file)
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python script.py <path_to_ftrace_file<output_excel_file>")
sys.exit(1)
ftrace_file_path = sys.argv[1]
output_excel_file = sys.argv[2]
if not os.path.exists(ftrace_file_path):
print(f"Error: File '{ftrace_file_path}' does not exist.")
sys.exit(1)
request_details, request_counts = parse_ftrace_file(ftrace_file_path)
print(f"Request details: {request_details}")
print(f"Request counts: {request_counts}")
save_to_excel(request_details, request_counts, output_excel_file)
print(f"Results saved to '{output_excel_file}'")
import re
import pandas as pd
读取ftrace文件内容
with open("path/to/your/ftrace_file.txt", "r") as f:
ftrace_output = f.read()
定义一个正则表达式模式,用于匹配block_rq_issue和block_rq_complete事件
pattern = r"(\d+.\d+):\s+(block_rq_issue|block_rq_complete):\s+(\d+),\s*(\d+)\s+([A-Z]+)\s+(\d+)\s+()\s+(\d+)\s++\s+(\d+)\s+[([a-zA-Z0-9_-]+)]"
使用正则表达式匹配ftrace输出
matches = re.finditer(pattern, ftrace_output)
初始化变量
request_data = {}
data = []
遍历匹配结果并提取信息
for match in matches:
timestamp = match.group(1)
event_type = match.group(2)
dev_major = match.group(3)
dev_minor = match.group(4)
rwbs = match.group(5)
sector = match.group(6)
nr_sector = match.group(7)
comm = match.group(8)
if event_type == "block_rq_issue":
request_id = (dev_major, dev_minor, sector, nr_sector, comm)
request_data[request_id] = {
"Timestamp Issue": timestamp,
"Dev Major": dev_major,
"Dev Minor": dev_minor,
"RWBS": rwbs,
"Sector": sector,
"Nr Sector": nr_sector,
"Comm": comm
}
elif event_type == "block_rq_complete":
request_id = (dev_major, dev_minor, sector, nr_sector, comm)
if request_id in request_data:
request_datarequest_id = timestamp
data.append(request_data[request_id])
del request_data[request_id]
将字典列表转换为DataFrame
df = pd.DataFrame(data)
计算延迟并将其添加到DataFrame
df["Latency"] = df.apply(lambda row: float(row["Timestamp Complete"]) - float(row["Timestamp Issue"]), axis=1)
将DataFrame写入Excel文件
df.to_excel("output.xlsx", index=False)