section .bss
inbuf resb 4000000
inpos resd 1
inlen resd 1
outbuf resb 4000000
outpos resd 1
cur_ptr resd 1
best_ptr resd 1
cur_cap resd 1
best_sz resd 1
best_hi resd 1
best_lo resd 1
section .text
extern malloc, realloc, free
global main
; -------- fast read int --------
read_int:
push ebx
push esi
mov esi, [inpos]
xor eax, eax
xor ecx, ecx
.skip:
mov bl, [inbuf+esi]
inc esi
cmp bl, '-'
je .neg
cmp bl, '0'
jl .skip
cmp bl, '9'
jg .skip
jmp .num
.neg:
mov ecx, 1
jmp .skip
.num:
sub bl, '0'
mov eax, ebx
.loop:
mov bl, [inbuf+esi]
cmp bl, '0'
jl .done
cmp bl, '9'
jg .done
sub bl, '0'
imul eax, eax, 10
add eax, ebx
inc esi
jmp .loop
.done:
mov [inpos], esi
test ecx, ecx
jz .ret
neg eax
.ret:
pop esi
pop ebx
ret
; -------- write int --------
write_int:
push ebx
push ecx
push edx
push esi
mov esi, [outpos]
test eax, eax
jns .pos
neg eax
mov byte [outbuf+esi], '-'
inc esi
.pos:
xor ecx, ecx
.loop:
xor edx, edx
mov ebx, 10
div ebx
push edx
inc ecx
test eax, eax
jnz .loop
.print:
pop edx
add dl, '0'
mov [outbuf+esi], dl
inc esi
loop .print
mov byte [outbuf+esi], ' '
inc esi
mov [outpos], esi
pop esi
pop edx
pop ecx
pop ebx
ret
; -------- main --------
main:
push ebp
mov ebp, esp
and esp, -16
push ebx
push esi
push edi
; read input
mov eax, 3
mov ebx, 0
mov ecx, inbuf
mov edx, 4000000
int 0x80
mov [inlen], eax
mov dword [inpos], 0
mov dword [outpos], 0
mov dword [best_ptr], 0
call read_int
mov ebx, eax ; N
.next_matrix:
test ebx, ebx
jz .output
dec ebx
call read_int
mov esi, eax ; n
mov eax, esi
imul eax, esi
shl eax, 2
; realloc buffer if needed
cmp eax, [cur_cap]
jbe .skip_alloc
mov [cur_cap], eax
sub esp, 12
push eax
push dword [cur_ptr]
call realloc
add esp, 20
mov [cur_ptr], eax
.skip_alloc:
xor edi, edi ; i
xor edx, edx ; tr_lo
xor ecx, ecx ; tr_hi
.read_i:
cmp edi, esi
jge .compare
xor eax, eax ; j
.read_j:
cmp eax, esi
jge .next_i
call read_int
mov ebp, [cur_ptr]
; store
mov [ebp + (edi*esi + eax)*4], eax
; diagonal?
cmp edi, eax
jne .skip_add
mov ebp, eax
cdq
add edx, ebp
adc ecx, edx
.skip_add:
inc eax
jmp .read_j
.next_i:
inc edi
jmp .read_i
.compare:
cmp dword [best_ptr], 0
je .save
cmp ecx, [best_hi]
jg .save
jl .next_matrix
cmp edx, [best_lo]
jg .save
jmp .next_matrix
.save:
; alloc best
mov eax, [cur_cap]
sub esp, 12
push eax
call malloc
add esp, 16
mov [best_ptr], eax
; copy
mov esi, [cur_ptr]
mov edi, eax
mov ecx, [cur_cap]
shr ecx, 2
.rep:
mov eax, [esi]
mov [edi], eax
add esi, 4
add edi, 4
loop .rep
mov [best_hi], ecx
mov [best_lo], edx
mov [best_sz], esi
jmp .next_matrix
.output:
; (упрощённый вывод — оставь свой write если хочешь)
; тут можно просто reuse твою часть вывода
mov eax, 1
int 0x80