系列文章目录

(一)从零开始设计RISC-V处理器——指令系统
(二)从零开始设计RISC-V处理器——单周期处理器的设计
(三)从零开始设计RISC-V处理器——单周期处理器的仿真
(四)从零开始设计RISC-V处理器——ALU的优化
(五)从零开始设计RISC-V处理器——五级流水线之数据通路的设计
(六)从零开始设计RISC-V处理器——五级流水线之控制器的设计
(七)从零开始设计RISC-V处理器——五级流水线之数据冒险
(八)从零开始设计RISC-V处理器——五级流水线之控制冒险
(九)从零开始设计RISC-V处理器——五级流水线之分支计算前移
(十)从零开始设计RISC-V处理器——五级流水线之静态预测



前言

上一篇文章已经介绍了五级流水线的处理器的数据通路的设计,主要是流水线的划分以及流水线寄存器的添加,今天来介绍控制信号的添加以及仿真与调试。


一、控制信号的添加

数据通路中的信号传递如下表所示
在这里插入图片描述
表中标红的信号Rd是上一篇文章中漏掉的,将在本文中补充进去。

将控制信号补充完整之后如下表所示(标红的为控制信号)
在这里插入图片描述
可以看到,首先在译码阶段产生完整的一组控制信号(control模块的输出)
然后将这一组控制信号输入到 ID/EX流水线寄存器组进行延迟一拍。
在执行阶段,需要用到ALUsrc,ALUctl,beq,bne,blt,bge,bltu,bgeu,jal,jalr信号。
但是由于jal和jalr信号在写回阶段需要再次用到,所以将jal和jalr信号随同剩下的信号一起输入到EX/MEM流水线寄存器并延迟一拍。
在访存阶段,要用掉三个信号,MemRead,MemWrite,RW_type,
并将剩下的信号输入到MEM/WB流水线寄存器。
在写回阶段,lui,U_type,MemtoReg用作选择器的选择端,RegWrite输入到寄存器堆。
以上就是控制信号的传递,实际上只需要按照上一篇文章的思路扩展流水线寄存器即可。

二、扩展流水线寄存器

以下直接给出代码,读者可参考上一篇文章的思路自行分析。

1.ID_EX流水线寄存器

`include "define.v"
module id_ex_regs(
	input clk,
	input rst_n,
	
	
	input [31:0]pc_id_ex_i,
	input [31:0]imme_id_ex_i,
	input [31:0]Rd_data1_id_ex_i,
	input [31:0]Rd_data2_id_ex_i,
	input [4:0]Rd_id_ex_i,
	output reg [31:0]pc_id_ex_o,
	output reg [31:0]imme_id_ex_o,
	output reg [31:0]Rd_data1_id_ex_o,
	output reg [31:0]Rd_data2_id_ex_o,
	output reg [4:0]Rd_id_ex_o,
	
	//control signals
	input ALUSrc_id_ex_i,
	input [3:0]ALUctl_id_ex_i,
	input beq_id_ex_i,
	input bne_id_ex_i,
	input blt_id_ex_i,
	input bge_id_ex_i,
	input bltu_id_ex_i,
	input bgeu_id_ex_i,
	input jal_id_ex_i,
	input jalr_id_ex_i,
	input MemRead_id_ex_i,
	input MemWrite_id_ex_i,
	input [2:0]RW_type_id_ex_i,
	input lui_id_ex_i,
	input U_type_id_ex_i,
	input MemtoReg_id_ex_i,
	input RegWrite_id_ex_i,
	
	output  reg ALUSrc_id_ex_o,
	output  reg [3:0]ALUctl_id_ex_o,
	output  reg beq_id_ex_o,
	output  reg bne_id_ex_o,
	output  reg blt_id_ex_o,
	output  reg bge_id_ex_o,
	output  reg bltu_id_ex_o,
	output  reg bgeu_id_ex_o,
	output  reg jal_id_ex_o,
	output  reg jalr_id_ex_o,
	output  reg MemRead_id_ex_o,
	output  reg MemWrite_id_ex_o,
	output  reg [2:0]RW_type_id_ex_o,
	output  reg lui_id_ex_o,
	output  reg U_type_id_ex_o,
	output  reg MemtoReg_id_ex_o,
	output  reg RegWrite_id_ex_o

    );

always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			pc_id_ex_o<=`zeroword;
		else
			pc_id_ex_o<=pc_id_ex_i;
	end
	
always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			imme_id_ex_o<=`zeroword;
		else
			imme_id_ex_o<=imme_id_ex_i;
	end
	
always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			Rd_data1_id_ex_o<=`zeroword;
		else
			Rd_data1_id_ex_o<=Rd_data1_id_ex_i;
	end

always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			Rd_data2_id_ex_o<=`zeroword;
		else
			Rd_data2_id_ex_o<=Rd_data2_id_ex_i;
	end

always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			Rd_id_ex_o<=5'd0;
		else
			Rd_id_ex_o<=Rd_id_ex_i;
	end
	
	
	
always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			ALUSrc_id_ex_o<=`zero;
		else
			ALUSrc_id_ex_o<=ALUSrc_id_ex_i;
	end

always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			ALUctl_id_ex_o<=4'b0000;
		else
			ALUctl_id_ex_o<=ALUctl_id_ex_i;
	end

always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			beq_id_ex_o<=`zero;
		else
			beq_id_ex_o<=beq_id_ex_i;
	end

always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			bne_id_ex_o<=`zero;
		else
			bne_id_ex_o<=bne_id_ex_i;
	end

always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			blt_id_ex_o<=`zero;
		else
			blt_id_ex_o<=blt_id_ex_i;
	end

always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			bge_id_ex_o<=`zero;
		else
			bge_id_ex_o<=bge_id_ex_i;
	end

always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			bltu_id_ex_o<=`zero;
		else
			bltu_id_ex_o<=bltu_id_ex_i;
	end

always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			bgeu_id_ex_o<=`zero;
		else
			bgeu_id_ex_o<=bgeu_id_ex_i;
	end

always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			jal_id_ex_o<=`zero;
		else
			jal_id_ex_o<=jal_id_ex_i;
	end

always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			jalr_id_ex_o<=`zero;
		else
			jalr_id_ex_o<=jalr_id_ex_i;
	end

always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			MemRead_id_ex_o<=`zero;
		else
			MemRead_id_ex_o<=MemRead_id_ex_i;
	end

always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			MemWrite_id_ex_o<=`zero;
		else
			MemWrite_id_ex_o<=MemWrite_id_ex_i;
	end

always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			RW_type_id_ex_o<=3'b000;
		else
			RW_type_id_ex_o<=RW_type_id_ex_i;
	end

always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			lui_id_ex_o<=`zero;
		else
			lui_id_ex_o<=lui_id_ex_i;
	end

always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			U_type_id_ex_o<=`zero;
		else
			U_type_id_ex_o<=U_type_id_ex_i;
	end

always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			MemtoReg_id_ex_o<=`zero;
		else
			MemtoReg_id_ex_o<=MemtoReg_id_ex_i;
	end

always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			RegWrite_id_ex_o<=`zero;
		else
			RegWrite_id_ex_o<=RegWrite_id_ex_i;
	end


endmodule

2.EX_MEM流水线寄存器

`include "define.v"
module ex_mem_regs(
	input clk,
	input rst_n,
	input [31:0]ALU_result_ex_mem_i,
	input [31:0]pc_jump_ex_mem_i,
	input [31:0]Rd_data2_ex_mem_i,
	input [4:0]Rd_ex_mem_i,
	input [31:0]imme_ex_mem_i,
	input [31:0]pc_order_ex_mem_i,
	output reg [31:0]ALU_result_ex_mem_o,   
	output reg [31:0]pc_jump_ex_mem_o,
	output reg [31:0]Rd_data2_ex_mem_o,     //DM
	output reg [31:0]imme_ex_mem_o,
	output reg [31:0]pc_order_ex_mem_o,
	output reg [4:0]Rd_ex_mem_o,
	
	//control signals
	input jal_ex_mem_i,
	input jalr_ex_mem_i,
	input MemRead_ex_mem_i,
	input MemWrite_ex_mem_i,
	input [2:0]RW_type_ex_mem_i,
	input lui_ex_mem_i,
	input U_type_ex_mem_i,
	input MemtoReg_ex_mem_i,
	input RegWrite_ex_mem_i,
	
	output reg  jal_ex_mem_o,
	output reg  jalr_ex_mem_o,
	output reg  MemRead_ex_mem_o,
	output reg  MemWrite_ex_mem_o,
	output reg [2:0] RW_type_ex_mem_o,
	output reg  lui_ex_mem_o,
	output reg  U_type_ex_mem_o,
	output reg  MemtoReg_ex_mem_o,
	output reg  RegWrite_ex_mem_o

	
    );

	always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			ALU_result_ex_mem_o<=`zeroword;
		else
			ALU_result_ex_mem_o<=ALU_result_ex_mem_i;
	end
	
	always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			pc_jump_ex_mem_o<=`zeroword;
		else
			pc_jump_ex_mem_o<=pc_jump_ex_mem_i;
	end
	
	always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			Rd_data2_ex_mem_o<=`zeroword;
		else
			Rd_data2_ex_mem_o<=Rd_data2_ex_mem_i;
	end
	
	always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			imme_ex_mem_o<=`zeroword;
		else
			imme_ex_mem_o<=imme_ex_mem_i;
	end
	
	always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			pc_order_ex_mem_o<=`zeroword;
		else
			pc_order_ex_mem_o<=pc_order_ex_mem_i;
	end
	
	always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			Rd_ex_mem_o<=5'd0;
		else
			Rd_ex_mem_o<=Rd_ex_mem_i;
	end
	
	
	
	always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			jal_ex_mem_o<=`zero;
		else
			jal_ex_mem_o<=jal_ex_mem_i;
	end
	
	
	always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			jalr_ex_mem_o<=`zero;
		else
			jalr_ex_mem_o<=jalr_ex_mem_i;
	end
	
	always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			MemRead_ex_mem_o<=`zero;
		else
			MemRead_ex_mem_o<=MemRead_ex_mem_i;
	end
	
	always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			MemWrite_ex_mem_o<=`zero;
		else
			MemWrite_ex_mem_o<=MemWrite_ex_mem_i;
	end
	
	always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			RW_type_ex_mem_o<=3'b000;
		else
			RW_type_ex_mem_o<=RW_type_ex_mem_i;
	end
	
	always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			lui_ex_mem_o<=`zero;
		else
			lui_ex_mem_o<=lui_ex_mem_i;
	end
	
	always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			U_type_ex_mem_o<=`zero;
		else
			U_type_ex_mem_o<=U_type_ex_mem_i;
	end
	
	always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			MemtoReg_ex_mem_o<=`zero;
		else
			MemtoReg_ex_mem_o<=MemtoReg_ex_mem_i;
	end
	
	always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			RegWrite_ex_mem_o<=`zero;
		else
			RegWrite_ex_mem_o<=RegWrite_ex_mem_i;
	end
	
	
endmodule

3.MEM_WB流水线寄存器

`include "define.v"
module mem_wb_regs(
	input clk,
	input rst_n,
	
	input [31:0]ALU_result_mem_wb_i,   
	input [31:0]pc_jump_mem_wb_i,
	input [31:0]loaddata_mem_wb_i,     //DM
	input [31:0]imme_mem_wb_i,
	input [31:0]pc_order_mem_wb_i,
	input [4:0]Rd_mem_wb_i,
	output reg [31:0]ALU_result_mem_wb_o,   
	output reg [31:0]pc_jump_mem_wb_o,
	output reg [31:0]loaddata_mem_wb_o,     //DM
	output reg [31:0]imme_mem_wb_o,
	output reg [31:0]pc_order_mem_wb_o,
	output reg [4:0]Rd_mem_wb_o,
	//control signals
	input jal_mem_wb_i,
	input jalr_mem_wb_i,
	input lui_mem_wb_i,
	input U_type_mem_wb_i,
	input MemtoReg_mem_wb_i,
	input RegWrite_mem_wb_i,
	
	output reg jal_mem_wb_o,
	output reg jalr_mem_wb_o,
	output reg lui_mem_wb_o,
	output reg U_type_mem_wb_o,
	output reg MemtoReg_mem_wb_o,
	output reg RegWrite_mem_wb_o

    );
	
	always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			ALU_result_mem_wb_o<=`zeroword;
		else
			ALU_result_mem_wb_o<=ALU_result_mem_wb_i;
	end
	
	always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			pc_jump_mem_wb_o<=`zeroword;
		else
			pc_jump_mem_wb_o<=pc_jump_mem_wb_i;
	end
	
	always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			loaddata_mem_wb_o<=`zeroword;
		else
			loaddata_mem_wb_o<=loaddata_mem_wb_i;
	end
	
	always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			imme_mem_wb_o<=`zeroword;
		else
			imme_mem_wb_o<=imme_mem_wb_i;
	end
	
	always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			pc_order_mem_wb_o<=`zeroword;
		else
			pc_order_mem_wb_o<=pc_order_mem_wb_i;
	end
	
	always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			Rd_mem_wb_o<=5'd0;
		else
			Rd_mem_wb_o<=Rd_mem_wb_i;
	end
	
	
	
	always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			jal_mem_wb_o<=`zero;
		else
			jal_mem_wb_o<=jal_mem_wb_i;
	end
	
	always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			jalr_mem_wb_o<=`zero;
		else
			jalr_mem_wb_o<=jalr_mem_wb_i;
	end
	
	always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			lui_mem_wb_o<=`zero;
		else
			lui_mem_wb_o<=lui_mem_wb_i;
	end
	
	always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			U_type_mem_wb_o<=`zero;
		else
			U_type_mem_wb_o<=U_type_mem_wb_i;
	end
	
	always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			MemtoReg_mem_wb_o<=`zero;
		else
			MemtoReg_mem_wb_o<=MemtoReg_mem_wb_i;
	end
	
	always@(posedge clk or negedge rst_n)
	begin
		if(!rst_n)
			RegWrite_mem_wb_o<=`zero;
		else
			RegWrite_mem_wb_o<=RegWrite_mem_wb_i;
	end
	
	

endmodule

三、顶层模块的设计

顶层模块的改动包括3点:
1.将流水线寄存器替换
2.将各个模块的控制信号接入
3.留意几个特殊的信号:
(1).访存阶段的控制信号R_en,W_en,RW_type要输出到CPU外部的数据存储器,之前直接从control模块输出,现在需要将其延迟两拍,从EX/MEM流水线寄存器输出,也就是说,在顶层模块中这组信号是从数据通路中输出来的而不是从控制器模块输出来的,
(2).译码阶段的寄存器堆模块的输入信号,Wr_reg_data和RegWrite,均来自写回阶段。

数据通路的代码如下:

module datapath(
	input 	clk,
	input   rst_n,
	input   [31:0]instr,


	input   MemtoReg,
	input   ALUSrc,
	input   RegWrite,
	input   lui,
	input   U_type,
	input   jal,
	input   jalr,
	input   beq,
	input   bne,
	input   blt,
	input   bge,
	input   bltu,
	input   bgeu,
	input   [3:0]ALUctl,
	input  [2:0]RW_type,
	input   MemRead,
	input   MemWrite,
	
	input [31:0]loaddata,
	
	
	output  MemRead_ex_mem_o,
	output  MemWrite_ex_mem_o,
	output [2:0] RW_type_ex_mem_o,
	
	output  [7:0]rom_addr,
	output [31:0]Wr_mem_data,
	output [31:0]ALU_result_ex_mem_o,
	output [6:0]opcode,
	output [2:0]func3,
	output func7
	
);

	wire [31:0]pc_if_i;
	wire [31:0]pc_if_o;

	wire [31:0]pc_if_id_o;
	wire [31:0]instr_if_id_o;

	
	wire [31:0]imme_id_o;
	wire [31:0]Rd_data1_id_o;
	wire [31:0]Rd_data2_id_o;
	wire [4:0]Rd_id_o;
	
///	
	wire [31:0]pc_id_ex_o;
	wire [31:0]imme_id_ex_o;
	wire [31:0]Rd_data1_id_ex_o;
	wire [31:0]Rd_data2_id_ex_o;
	wire [4:0]Rd_id_ex_o;
	
	wire ALUSrc_id_ex_o;
	wire [3:0]ALUctl_id_ex_o;
	wire beq_id_ex_o;
	wire bne_id_ex_o;
	wire blt_id_ex_o;
	wire bge_id_ex_o;
	wire bltu_id_ex_o;
	wire bgeu_id_ex_o;
	wire jal_id_ex_o;
	wire jalr_id_ex_o;
	wire MemRead_id_ex_o;
	wire MemWrite_id_ex_o;
	wire [2:0]RW_type_id_ex_o;
	wire lui_id_ex_o;
	wire U_type_id_ex_o;
	wire MemtoReg_id_ex_o;
	wire RegWrite_id_ex_o;
/	
	
	
	
	wire [31:0] ALU_result_ex_o;
	wire [31:0] pc_jump_o;
	wire [31:0] imme_ex_o;
	wire [31:0] pc_order_ex_o;
	
///	
	//wire [31:0] ALU_result_ex_mem_o;
	wire [31:0] pc_jump_ex_mem_o;
	wire [31:0] Rd_data2_ex_mem_o;
	wire [31:0] imme_ex_mem_o;
	wire [31:0] pc_order_ex_mem_o;
	wire [4:0]Rd_ex_mem_o;

	wire  jal_ex_mem_o;
	wire  jalr_ex_mem_o;
//	wire  MemRead_ex_mem_o;
//	wire  MemWrite_ex_mem_o;
//	wire [2:0] RW_type_ex_mem_o;
	wire  lui_ex_mem_o;
	wire  U_type_ex_mem_o;
	wire  MemtoReg_ex_mem_o;
	wire  RegWrite_ex_mem_o;
	



///	
	wire [31:0] ALU_result_mem_wb_o;
	wire [31:0] pc_jump_mem_wb_o;
	wire [31:0] loaddata_mem_wb_o;
	wire [31:0] imme_mem_wb_o;
	wire [31:0] pc_order_mem_wb_o;
	wire [4:0] Rd_mem_wb_o;
	
	wire  jal_mem_wb_o;
	wire  jalr_mem_wb_o;
	wire  lui_mem_wb_o;
	wire  U_type_mem_wb_o;
	wire  MemtoReg_mem_wb_o;
	wire  RegWrite_mem_wb_o;
//
	
	
	wire [31:0]Wr_reg_data_wb_o;
	
	
	
	
if_stage if_stage_inst (
    .clk(clk), 
    .rst_n(rst_n), 
    .pc_if_i(pc_if_i), 
    .pc_if_o(pc_if_o), 
    .rom_addr(rom_addr)
    );

if_id_regs  if_id_regs_inst(
    .clk(clk), 
    .rst_n(rst_n), 
    .pc_if_id_i(pc_if_o), 
    .instr_if_id_i(instr), 
    .pc_if_id_o(pc_if_id_o), 
    .instr_if_id_o(instr_if_id_o)
    );

id_stage id_stage_inst (
    .clk(clk), 
    .rst_n(rst_n), 
    .RegWrite_id_i(RegWrite_mem_wb_o), //WB stage singal
	.Rd_id_i(Rd_mem_wb_o),
    .Wr_reg_data_id_i(Wr_reg_data_wb_o), 
    .instr_id_i(instr_if_id_o), 
    .opcode_id_o(opcode), 
    .func3_id_o(func3), 
    .func7_id_o(func7), 
    .imme_id_o(imme_id_o), 
    .Rd_data1_id_o(Rd_data1_id_o), 
    .Rd_data2_id_o(Rd_data2_id_o),
	.Rd_id_o(Rd_id_o)
    );

id_ex_regs id_ex_regs_inst (
    .clk(clk), 
    .rst_n(rst_n), 
    .pc_id_ex_i(pc_if_id_o), 
    .imme_id_ex_i(imme_id_o), 
    .Rd_data1_id_ex_i(Rd_data1_id_o), 
    .Rd_data2_id_ex_i(Rd_data2_id_o), 
	.Rd_id_ex_i(Rd_id_o),
    .pc_id_ex_o(pc_id_ex_o), 
    .imme_id_ex_o(imme_id_ex_o), 
    .Rd_data1_id_ex_o(Rd_data1_id_ex_o), 
    .Rd_data2_id_ex_o(Rd_data2_id_ex_o),
	.Rd_id_ex_o(Rd_id_ex_o),
	control signals
	.ALUSrc_id_ex_i(ALUSrc), 
    .ALUctl_id_ex_i(ALUctl), 
    .beq_id_ex_i(beq), 
    .bne_id_ex_i(bne), 
    .blt_id_ex_i(blt), 
    .bge_id_ex_i(bge), 
    .bltu_id_ex_i(bltu), 
    .bgeu_id_ex_i(bgeu), 
    .jal_id_ex_i(jal), 
    .jalr_id_ex_i(jalr), 
    .MemRead_id_ex_i(MemRead), 
    .MemWrite_id_ex_i(MemWrite), 
    .RW_type_id_ex_i(RW_type), 
    .lui_id_ex_i(lui), 
    .U_type_id_ex_i(U_type), 
    .MemtoReg_id_ex_i(MemtoReg), 
    .RegWrite_id_ex_i(RegWrite), 
    .ALUSrc_id_ex_o(ALUSrc_id_ex_o), 
    .ALUctl_id_ex_o(ALUctl_id_ex_o), 
    .beq_id_ex_o(beq_id_ex_o), 
    .bne_id_ex_o(bne_id_ex_o), 
    .blt_id_ex_o(blt_id_ex_o), 
    .bge_id_ex_o(bge_id_ex_o), 
    .bltu_id_ex_o(bltu_id_ex_o), 
    .bgeu_id_ex_o(bgeu_id_ex_o), 
    .jal_id_ex_o(jal_id_ex_o), 
    .jalr_id_ex_o(jalr_id_ex_o), 
    .MemRead_id_ex_o(MemRead_id_ex_o), 
    .MemWrite_id_ex_o(MemWrite_id_ex_o), 
    .RW_type_id_ex_o(RW_type_id_ex_o), 
    .lui_id_ex_o(lui_id_ex_o), 
    .U_type_id_ex_o(U_type_id_ex_o), 
    .MemtoReg_id_ex_o(MemtoReg_id_ex_o), 
    .RegWrite_id_ex_o(RegWrite_id_ex_o)
    );

ex_stage ex_stage_inst (
    .ALUctl_ex_i(ALUctl_id_ex_o), 
    .beq_ex_i(beq_id_ex_o), 
    .bne_ex_i(bne_id_ex_o), 
    .blt_ex_i(blt_id_ex_o), 
    .bge_ex_i(bge_id_ex_o), 
    .bltu_ex_i(bltu_id_ex_o), 
    .bgeu_ex_i(bgeu_id_ex_o), 
    .jal_ex_i(jal_id_ex_o), 
    .jalr_ex_i(jalr_id_ex_o), 
    .ALUSrc_ex_i(ALUSrc_id_ex_o), 
    .pc_ex_i(pc_id_ex_o), 
    .imme_ex_i(imme_id_ex_o), 
    .Rd_data1_ex_i(Rd_data1_id_ex_o), 
    .Rd_data2_ex_i(Rd_data2_id_ex_o), 
    .ALU_result_ex_o(ALU_result_ex_o), 
    .pc_new_ex_o(pc_if_i), 
    .pc_jump_o(pc_jump_o), 
    .imme_ex_o(imme_ex_o), 
    .pc_order_ex_o(pc_order_ex_o)
    );
	
ex_mem_regs ex_mem_regs_inst (
    .clk(clk), 
    .rst_n(rst_n), 
    .ALU_result_ex_mem_i(ALU_result_ex_o), 
    .pc_jump_ex_mem_i(pc_jump_o), 
    .Rd_data2_ex_mem_i(Rd_data2_id_ex_o), 
    .imme_ex_mem_i(imme_ex_o), 
    .pc_order_ex_mem_i(pc_order_ex_o), 
	.Rd_ex_mem_i(Rd_id_ex_o),
    .ALU_result_ex_mem_o(ALU_result_ex_mem_o), 
    .pc_jump_ex_mem_o(pc_jump_ex_mem_o), 
    .Rd_data2_ex_mem_o(Wr_mem_data), 
    .imme_ex_mem_o(imme_ex_mem_o), 
    .pc_order_ex_mem_o(pc_order_ex_mem_o),
	.Rd_ex_mem_o(Rd_ex_mem_o),
	//control signals
	.jal_ex_mem_i(jal_id_ex_o), 
    .jalr_ex_mem_i(jalr_id_ex_o), 
    .MemRead_ex_mem_i(MemRead_id_ex_o), 
    .MemWrite_ex_mem_i(MemWrite_id_ex_o), 
    .RW_type_ex_mem_i(RW_type_id_ex_o), 
    .lui_ex_mem_i(lui_id_ex_o), 
    .U_type_ex_mem_i(U_type_id_ex_o), 
    .MemtoReg_ex_mem_i(MemtoReg_id_ex_o), 
    .RegWrite_ex_mem_i(RegWrite_id_ex_o), 
    .jal_ex_mem_o(jal_ex_mem_o), 
    .jalr_ex_mem_o(jalr_ex_mem_o), 
    .MemRead_ex_mem_o(MemRead_ex_mem_o), /output control
    .MemWrite_ex_mem_o(MemWrite_ex_mem_o), output control
    .RW_type_ex_mem_o(RW_type_ex_mem_o), ///output control
    .lui_ex_mem_o(lui_ex_mem_o), 
    .U_type_ex_mem_o(U_type_ex_mem_o), 
    .MemtoReg_ex_mem_o(MemtoReg_ex_mem_o), 
    .RegWrite_ex_mem_o(RegWrite_ex_mem_o)
    );

mem_wb_regs mem_wb_regs_inst (
	.clk(clk),
	.rst_n(rst_n),
    .ALU_result_mem_wb_i(ALU_result_ex_mem_o), 
    .pc_jump_mem_wb_i(pc_jump_ex_mem_o), 
    .loaddata_mem_wb_i(loaddata), 
    .imme_mem_wb_i(imme_ex_mem_o), 
    .pc_order_mem_wb_i(pc_order_ex_mem_o), 
	.Rd_mem_wb_i(Rd_ex_mem_o),
    .ALU_result_mem_wb_o(ALU_result_mem_wb_o), 
    .pc_jump_mem_wb_o(pc_jump_mem_wb_o), 
    .loaddata_mem_wb_o(loaddata_mem_wb_o), 
    .imme_mem_wb_o(imme_mem_wb_o), 
    .pc_order_mem_wb_o(pc_order_mem_wb_o),
	.Rd_mem_wb_o(Rd_mem_wb_o),
	control signals
	.jal_mem_wb_i(jal_ex_mem_o), 
    .jalr_mem_wb_i(jalr_ex_mem_o), 
    .lui_mem_wb_i(lui_ex_mem_o), 
    .U_type_mem_wb_i(U_type_ex_mem_o), 
    .MemtoReg_mem_wb_i(MemtoReg_ex_mem_o), 
    .RegWrite_mem_wb_i(RegWrite_ex_mem_o), 
    .jal_mem_wb_o(jal_mem_wb_o), 
    .jalr_mem_wb_o(jalr_mem_wb_o), 
    .lui_mem_wb_o(lui_mem_wb_o), 
    .U_type_mem_wb_o(U_type_mem_wb_o), 
    .MemtoReg_mem_wb_o(MemtoReg_mem_wb_o), 
    .RegWrite_mem_wb_o(RegWrite_mem_wb_o)
    );
	
wb_stage wb_stage_inst (
    .MemtoReg(MemtoReg_mem_wb_o), 
    .jal(jal_mem_wb_o), 
    .jalr(jalr_mem_wb_o), 
    .lui(lui_mem_wb_o), 
    .U_type(U_type_mem_wb_o), 
    .ALU_result_wb_i(ALU_result_mem_wb_o), 
    .pc_jump_wb_i(pc_jump_mem_wb_o), 
    .loaddata_wb_i(loaddata_mem_wb_o), 
    .imme_wb_i(imme_mem_wb_o), 
    .pc_order_wb_i(pc_order_mem_wb_o), 
    .Wr_reg_data_wb_o(Wr_reg_data_wb_o)
    );
	
endmodule

加入控制器后的顶层模块如下:


module riscv(
	input clk,
	input rst_n,
	input [31:0]instr,
	input [31:0]Rd_mem_data,
	
	output [7:0]rom_addr,
	
	output [31:0]Wr_mem_data,
	output W_en,
	output R_en,
	output [31:0]ram_addr,
	output [2:0]RW_type    
    );
	
	wire [6:0]opcode;
	wire [2:0]func3;
	wire func7;
	wire MemtoReg;
	wire ALUSrc;
	wire RegWrite;
	wire lui;
	wire U_type;
	wire jal;
	wire jalr;
	wire beq;
	wire bne;
	wire blt;
	wire bge;
	wire bltu;
	wire bgeu;
	wire [3:0]ALUctl;
	wire MemWrite;
	wire MemRead;
	wire [2:0]RW_type_id;
	
	
	control control_inst (
    .opcode(opcode), 
    .func3(func3), 
    .func7(func7), 
    .MemRead(MemRead), 
    .MemtoReg(MemtoReg), 
    .MemWrite(MemWrite), 
    .ALUSrc(ALUSrc), 
    .RegWrite(RegWrite), 
	.lui(lui),
	.U_type(U_type),
    .jal(jal), 
    .jalr(jalr), 
    .beq(beq), 
    .bne(bne), 
    .blt(blt), 
    .bge(bge), 
    .bltu(bltu), 
    .bgeu(bgeu), 
    .RW_type(RW_type_id), 
    .ALUctl(ALUctl)
    );
	
	datapath datapath_inst (
    .clk(clk), 
    .rst_n(rst_n), 
    .instr(instr), 
    .MemtoReg(MemtoReg), 
    .ALUSrc(ALUSrc), 
    .RegWrite(RegWrite), 
	.lui(lui),
	.U_type(U_type),
    .jal(jal), 
    .jalr(jalr), 
    .beq(beq), 
    .bne(bne), 
    .blt(blt), 
    .bge(bge), 
    .bltu(bltu), 
    .bgeu(bgeu), 
    .ALUctl(ALUctl), 
	.MemRead(MemRead), 
    .MemWrite(MemWrite), 
    .RW_type(RW_type_id), 
	.MemRead_ex_mem_o(R_en), /output control
    .MemWrite_ex_mem_o(W_en), output control
    .RW_type_ex_mem_o(RW_type), //output control
    .loaddata(Rd_mem_data), 
    .rom_addr(rom_addr), 
    .Wr_mem_data(Wr_mem_data),
	.ALU_result_ex_mem_o(ram_addr),
	.opcode(opcode),
	.func3(func3),
	.func7(func7)
    );

endmodule

其他的代码块基本没有改动。
改动过后的完整代码列表:
在这里插入图片描述
综合后查看RTL视图:
在这里插入图片描述

四、仿真与调试

现将仿真与测试过程中遇到的问题总结如下:

1.五级流水线每隔3个时钟周期,pc才更新一次
原因:取指阶段的pc刚开始是0,IF/ID流水线寄存器刚开始也是0,译码阶段也是0……
这就导致前几个时钟周期内,EX阶段的pc+4的结果连续都是4,进而导致连续3个8,连续3个12……
在这里插入图片描述

解决方案:
一开始设计流水线阶段的时候,其实进入了一个误区
为什么要把pc+4这一操作延迟到执行阶段呢
其实是受到pc+imme的误导,由于imme在译码阶段才产生,所以pc+imme要等到执行阶段才能产生。但是pc+4是没必要等待的。
因此将pc+4这一操作提前到取指阶段即可解决问题(或者说pc+4,pc是取指阶段的pc而不是执行阶段的pc)。

修改之后如下所示:
在这里插入图片描述
2.测试第一组指令

addi x1,x0,1
addi x2,x0,2
addi x3,x0,3
addi x4,x0,4
addi x5,x0,5
add x6,x3,x3
add x7,x3,x4
add x8,x3,x5
sub x9,x0,x5
sub x10,x5,x4

执行结果如下:
在这里插入图片描述
可以看到add指令是有问题的,不能正确执行
经过分析,可以看到,执行结果错误的原因是因为发生了数据冒险。
简单来说就是,由于流水线机制,在前面执行的指令的结果还没有来得及写回寄存器堆,就已经需要使用这个结果,这个时候从寄存器堆读出来寄存器的值并不是最新的值。比如add x6,x3,x3,前面指令执行结果X3中是3,但是在译码的时候,读出X3的值并不是3而是0,因此X6的执行结果是0而不是6。
数据冒险的内容将在下一篇文章中集中讨论,此处暂且忽略。

3.测试第2组指令

lui x1,0xfffff
auipc x2,0xfff

执行结果如下:
在这里插入图片描述
4.测试第3组指令

addi x1,x0,0x70
addi x2,x0,0x71
addi x3,x0,0xf0
addi x4,x0,0xf1
sb x1,0,x0
slli x1,x1,24
sb x2,1,x0
sb x3,2,x0
sb x4,3,x0
add x5,x1,x4
lb x6,0,x0
lb x7,1,x0
lb x8,2,x0
lb x9,3,x0
sw x5,4,x0
sh x5,8,x0
lbu x11,0,x0
lbu x12,1,x0
lbu x13,2,x0
lbu x14,3,x0
lh x16,0,x0
lh x17,1,x0
lh x18,2,x0
lh x19,3,x0
lh x20,8,x0
lhu x21,0,x0
lhu x22,1,x0
lhu x23,2,x0
lhu x24,3,x0
lw x25,4,x0

执行结果如下:
在这里插入图片描述
在这里插入图片描述

5.测试第4组指令

addi x1,x0,0b11001010
addi x2,x0,0b00111010
addi x3,x0,0b00110101
addi x4,x0,0b11110000
andi x5,x1,0b00111010
ori x6,x1,0b00111010
xori x7,x1,0b00110101
and x8,x1,x2
or x9,x1,x2
xor x10,x1,x3

执行结果如下:
在这里插入图片描述

6.测试第5组指令

addi x1,x0,-1
addi x2,x0,-2
addi x3,x0,3
addi x4,x0,4
addi x21,x0,21
addi x22,x0,22
slt x5,x1,x2
slt x6,x2,x1
slt x7,x3,x4
slt x8,x4,x3
sltu x9,x1,x3
sltu x10,x3,x1
sltu x11,x1,x2
slti x12,x3,-3
slti x13,x3,4
slti x14,x2,-1
slti x15,x2,-4
sltiu x16,x3,-3
sltiu x17,x3,4
sltiu x18,x2,-1
sltiu x19,x2,-4


执行结果如下:
在这里插入图片描述

7.测试第6组指令

addi x1,x0,0xff
addi x2,x0,4
addi x6,x0,-0xff
addi x21,x0,21
addi x22,x0,22
addi x23,x0,23

sll x3,x1,x2
srl x4,x1,x2
sra x5,x1,x2
sll x7,x6,x2
srl x8,x6,x2
sra x9,x6,x2

slli x11,x1,4
srli x12,x1,4
srai x13,x1,4

slli x11,x6,4
srli x12,x6,4
srai x13,x6,4


执行结果如下:
在这里插入图片描述
除了跳转指令以外,其他的指令都已经测试正确。跳转指令涉及到控制冒险,将在后续的设计中进行测试。

总结

以上就是简单的五级流水线的设计,之所以说是“简单”的五级流水线设计,是因为这个设计已经有了流水线的结构,但是还有很多地方有待进一步完善,比如上面提到的数据冒险和控制冒险。
现在的这个五级流水线的处理器已经能够正确的运行指令,但是在编写测试代码的时候,需要注意避开数据冒险,最简单的方法就是,源寄存器a的指令与上一条寄存器a作为目的寄存器的指令中间至少要间隔3条指令。
至于为什么是3条,我们在下一篇文章再深入讨论。

Logo

华为开发者空间,是为全球开发者打造的专属开发空间,汇聚了华为优质开发资源及工具,致力于让每一位开发者拥有一台云主机,基于华为根生态开发、创新。

更多推荐