module idct (clk, reset, start, done, din, dout,
c_addr, c_data,
x_addr, x_data, x_we,
t_addr, t_data, t_we,
o_addr, o_data, o_we
);
input clk, reset, start;
input [11:0] din;
output done;
output [8:0] dout;
 
// C COEFFICENTS ROM INTERFACE (26x32)
input [25:0] c_data ;
output [4:0] c_addr ;
 
// X (D_IN) RAM INTERFACE (24x32)
inout [23:0] x_data ;
wire [23:0] x_data_out ;
output [4:0] x_addr ;
output x_we ;
wire x_we ;
wire [23:0] x_data ;
wire [23:0] x_data_in ;
 
// TRANSPOSE RAM INTERFACE (32x32)
inout [31:0] t_data ;
output [4:0] t_addr ;
output t_we ;
wire t_we;
wire [31:0] t_data ;
wire [31:0] t_data_out ;
wire [31:0] t_data_in;
 
 
// OUTPUT RAM INTERFACE (9x64)
inout [8:0] o_data ;
output [5:0] o_addr ;
output o_we ;
wire [8:0] o_data ;
wire [8:0] o_data_out;
wire [8:0] o_data_in;
wire o_we ;
 
reg [8:0] dout;
wire done;
reg [1:0] k,j;
reg [2:0] i,j2;
reg [11:0] temp12;
reg [12:0] temp13a;
reg [12:0] temp13b;
reg [23:0] temp24;
reg [24:0] temp25a;
reg [24:0] temp25b;
reg [25:0] temp26;
reg [28:0] temp29;
reg [31:0] temp32;
 
function [24:0] mult13_12;
/* twos complement multiplication */
input [12:0] A; // the port names A, B, Z
input [11:0] B; // are taken from the designWare
// otherwise this won't work
reg sgn;
// to one designWare function.
begin
// the following is only for simulation
sgn = A[12] ^ B[11];
if (A[12] == 1'b1) A = ~A + 1'b1;
if (B[11] == 1'b1) B = ~B + 1'b1;
mult13_12= A * B;
if (sgn == 1'b1) mult13_12 = ~mult13_12 + 1'b1;
end
endfunction
 
function [24:0] mult13_16;
/* twos complement multiplication */
input [12:0] A; // the port names A, B, Z
input [15:0] B; // are taken from the designWare
// otherwise this won't work
reg sgn;
begin
// the following is only for simulation
sgn = A[12] ^ B[15];
if (A[12] == 1'b1) A = ~A + 1'b1;
if (B[15] == 1'b1) B = ~B + 1'b1;
mult13_16 = A * B;
if (sgn == 1'b1) mult13_16 = ~mult13_16 + 1'b1;
end
endfunction
 
 
//-----------------------------------------------------------------
reg [4:0] main_state ;
reg [4:0] main_next_state ;
 
parameter
main_idle = 0 ,
load_x_ram = 1 ,
wrt_x_ram = 2 ,
wrt_x_ram_1 = 17 ,
transpose = 3 ,
wait4_trans_incrk = 4 ,
read_crom = 5 ,
read_xram1 = 6 ,
multiply_add1 = 7 ,
read_xram2 = 8 ,
multiply_add2 = 9 ,
multiply = 10 ,
read_tram = 11 ,
read_crom2 = 12 ,
multiply_add3 = 13 ,
wait4_mult_incrk = 14 ,
wait4_unload_start = 15 ,
wait4_unload = 16 ;
 
 
always @(start or main_state or i or j or k or j2 ) begin
case(main_state )
main_idle : if (start )
main_next_state <= load_x_ram ;
else
main_next_state <= main_idle ;
 
load_x_ram : main_next_state <= wrt_x_ram_1 ;
 
wrt_x_ram_1 : main_next_state <= wrt_x_ram ;
 
wrt_x_ram : if((j==3) && (i==7))
main_next_state <= transpose ;
else
main_next_state <= wrt_x_ram_1 ;
 
transpose : main_next_state <=read_crom;
 
read_crom : main_next_state <= read_xram1;
read_xram1 : main_next_state <= multiply_add1;
multiply_add1 : main_next_state <= read_xram2;
read_xram2 : main_next_state <= multiply_add2;
multiply_add2 : main_next_state <= wait4_trans_incrk;
wait4_trans_incrk : if ((k==3) && (j==3) && (i==7))
main_next_state <= multiply ;
else
main_next_state <= read_crom ;
multiply : main_next_state <=read_tram ;
 
wait4_mult_incrk : if ((k==3) && (j2==7) && (i==7))
main_next_state <=wait4_unload_start ;
else
main_next_state <= read_tram ;
read_tram : main_next_state <= read_crom2 ;
read_crom2 : main_next_state <= multiply_add3 ;
multiply_add3 : main_next_state <= wait4_mult_incrk ;
 
wait4_unload_start: if (start)
main_next_state <= wait4_unload ;
else
main_next_state <= wait4_unload_start ;
wait4_unload : if ((i==7)&&(j2==7)) main_next_state <= main_idle ;
else
main_next_state <= wait4_unload ;
endcase
end
 
always @(posedge clk )
begin
if (reset )
main_state <= main_idle ;
else
main_state <= main_next_state ;
end
//-------------------------------------------------------------------
// i counter
// outer for loop
always @(posedge clk )
begin
if (reset )
i <= 3'b0;
/* initialize before counting */
else if ((main_state==load_x_ram) |
(main_state==transpose) |
(main_state==multiply) |
(main_state==wait4_unload_start))
i <= 3'b0;
else if(((j==3) && (main_state==wrt_x_ram ))|
((j==3) && (k==3) && (main_state==wait4_trans_incrk))|
((j2==7) && (k==3) && (main_state==wait4_mult_incrk ))|
((j2==7) && (main_state==wait4_unload)))
i <=i+1;
else
i <= i ;
end
//-------------------------------------------------------------------
// j counter
// second inner for loop
always @(posedge clk )
begin
if (reset )
j <= 2'b0;
else if ((main_state==load_x_ram) |
(main_state==transpose))
j <= 2'b0 ;
/* transpose and second multiplication have inner k loops */
else if((main_state==wrt_x_ram) |
((main_state==wait4_trans_incrk) && (k==3) ) )
j <=j+1;
else
j <= j ;
end
 
 
//-------------------------------------------------------------------
// j2 counter
// second inner for loop
always @(posedge clk )
begin
if (reset )
j2 <= 3'b0;
/* initialize before counting */
else if ((main_state==multiply) |
(main_state==wait4_unload_start))
j2 <= 3'b0 ;
/* transpose and second multiplication have inner k loops */
else if(((main_state==wait4_mult_incrk) && (k==3)) |
(main_state==wait4_unload))
j2 <=j2+1;
else
j2 <= j2 ;
end
 
//---------------------------------------------------------------------
 
// k counter
// inner most loop
 
always @(posedge clk )
begin
if (reset )
k <= 2'b0;
/* initialize before counting */
else if ((main_state==load_x_ram) |
(main_state==transpose) |
(main_state==multiply))
k <= 2'b0 ;
else if((main_state==wait4_trans_incrk) |
(main_state==wait4_mult_incrk) )
k <=k+1;
else
k <= k ;
end
 
//-------------------------------------------------------------------
assign x_addr =(main_state==read_xram1)? 4*(j*2)+k :(main_state==read_xram2)? 4 * (j * 2 + 1) + k : 4*i+j ;
assign t_addr =(main_state==read_tram) ? 4*j2+k : 4*i+j ;
assign o_addr = 8*i+j2 ;
/*
assign c_addr =((main_state==read_crom)|
(main_state==read_crom2))? 4*i+k : 4*i+j ;
*/
assign c_addr = 4*i+k ;
 
//___________________________________________________________________
 
always @ (posedge clk)
temp12<=din;
assign x_we= (main_state==wrt_x_ram_1) ? 1'b1: 1'b0;
assign x_data_in={temp12,din}; //for writing into x_ram
assign x_data = (x_we)? x_data_in : 24'hZZZZZZ;
assign x_data_out = x_data;
//___________________________________________________________________
//transpose and first multiplication
always @ (posedge clk)
begin
if (reset ) begin
temp26 <= 0 ;
end
else if (main_state==read_crom | main_state ==read_crom2) begin
temp26 <= c_data;
end
else begin
temp26 <= temp26;
end
if (main_state==read_xram1) begin
temp24 <= x_data_out;//addr is different here check out
end
else if (main_state==read_xram2) begin
temp24 <= x_data_out;//check the addr here
end
else begin
temp24 <= temp24;
end
end
always @ (posedge clk)
if (reset ) begin
temp25a <= 0 ;
end
//else if ((main_state==read_crom) && (k==3)) begin
else if ((main_state==wait4_trans_incrk) && (k==3)) begin
temp25a <= 0;
end
else if (main_state==multiply_add1) begin
temp25a <= temp25a + mult13_12(temp26[25:13], temp24[23:12]) +
mult13_12(temp26[12:0], temp24[11:0]);
end
else begin
temp25a <=temp25a ;
end
always @ (posedge clk)
if (reset ) begin
temp25b <= 0 ;
end
//else if ((main_state==read_crom) && (k==3)) begin
else if ((main_state==wait4_trans_incrk) && (k==3)) begin
temp25b <= 0;
end
else if (main_state==multiply_add2) begin
temp25b <= temp25b + mult13_12(temp26[25:13], temp24[23:12]) +
mult13_12(temp26[12:0], temp24[11:0]);
end
else begin
temp25b <=temp25b ;
end
 
//debug
//initial
//$monitor ("temp26=%x,temp24=%x,temp25a=%x,temp25b=%x,i=%x,j=%x,k=%x",temp26,temp24,temp25a,temp25b,i,j,k);
 
//______________________________________________________________________
//writing into the t-ram
 
assign t_data_in ={temp25a[23:8], temp25b[23:8]};
assign t_we= ((k==3) && (main_state==wait4_trans_incrk))? 1'b1:1'b0;
assign t_data = (t_we)? t_data_in : 32'hZZZZ_ZZZZ;
assign t_data_out = t_data;
//____________________________________________________________________
 
//second multiplication and transpose
 
//reading t_ram
always @ (posedge clk)
if (reset)
temp32<=0;
else
if (main_state==read_tram)
temp32<=t_data_out;
else
temp32<=temp32;
/*//reading the crom again for second time
always @ (posedge clk)
if (main_state==read_crom2)
temp26<= c_data;
else
temp26<= temp26;*/
//multiply and adding
always @ (posedge clk)
if (((main_state == wait4_mult_incrk) && (k==3)) | (main_state==multiply) )
temp29<=0;
else
if (main_state == multiply_add3)
temp29 <= temp29 + mult13_16(temp26[25:13], temp32[31:16]) +
mult13_16(temp26[12:0], temp32[15:0]);
else
temp29 <= temp29 ;
//loading output ram
assign o_we=((main_state == wait4_mult_incrk) && (k==3))? 1'b1:1'b0;
 
assign o_data_in = temp29[26:18];
assign o_data = (o_we)? o_data_in : 9'hZZZ;
assign o_data_out = o_data;
 
//____________________________________________________________________________
// writing the results out
 
always @ (posedge clk)
if (main_state == wait4_unload)
dout<= o_data;
else
dout<= dout;
 
//________________________________________________________________________
 
//asserting and deasserting done
assign done = (main_state==wait4_unload)? 1'b1:1'b0;
//________________________________________________________________________
 
endmodule