lang/python/ SHA256


Here is the source. The first file implements the algorithm and, following it, are the sources for consts.py and shifts.py.

import struct

from consts import I,K
from shifts import right_rotate, right_shift

wordmask = 0xffffffff # for taking remainder modulo 2**32-1

def word_array_to_hex(words):
  t = ""
  for word in words:
    t += f"{word:08x}"
  return t

class sha:
  block_size_bytes = 512 // 8
  def __init__(self):
    self.H = [x for x in I]
    self.data = bytearray()
    self.length_in_bits = 0

  def update(self,data):
    # convert data to utf8 if needed
    if isinstance(data,str):
      data = data.encode("utf8")

    # append data to self.data
    # if len(self.data) >= 512, process blocks and consume until
    # len(self.data) < 512
    self.length_in_bits += len(data) * 8
    self.data += data
    while len(self.data) > sha.block_size_bytes:
      self._process_block()

  def finalize(self,data):
    self.update(data) # append data and process any blocks as needed
    # pad with 0x80 0x00 .... 0x00 L where L is 8 bytes and is the length of the data in bits
    # at least 9 bytes are added. so we need to take len(self.data) + 9 and round to next block_size_bytes
    # so len(self.data) + 9 + x = 0 (mod 512//8)
    # we know that len(self.data) < 512
    # so x = (1024//8 - 9 - len(self.data)) % 512//8
    # self.data += bytearray
    num_zero_bytes = (2*sha.block_size_bytes - 9 - len(self.data)) % sha.block_size_bytes
    len_be = struct.pack(">Q",self.length_in_bits)
    pad = b'\x80' + (b'\x00' * num_zero_bytes) + len_be
    self.data += pad
    try:
      assert(len(self.data)%64 == 0)
    except AssertionError:
      print("Assert error 1")
      print(num_zero_bytes,len_be,len(self.data),len(self.data)%64)
      raise
    self._process_block()
    return self.H

  def _process_block(self):
    block = self.data[:sha.block_size_bytes] # remove one block from start of data
    print(block)
    self.data = self.data[sha.block_size_bytes:]
    w = list(struct.unpack(">"+("L"*16),block)) + [0]*48
    #print("block",w[:16])
    #print("array",w)
    #exit()
    for i in range(16,64):
      s0 = right_rotate(w[i-15], 7) ^ right_rotate(w[i-15], 18) ^ right_shift(w[i-15],3)
      s1 = right_rotate(w[i-2], 17) ^ right_rotate(w[i-2], 19)  ^ right_shift(w[i-2],10)
      w[i] = (w[i-16] + s0 + w[i-7] + s1) & wordmask

    a, b, c, d, e, f, g, h = self.H

    for i in range(64):
      s1 = right_rotate(e,6) ^ right_rotate(e,11) ^ right_rotate(e,25)
      ch = (e & f) ^ ((~e) & g)
      temp1 = (h + s1 + ch + K[i] + w[i]) & wordmask
      s0 = right_rotate(a,2) ^ right_rotate(a,13) ^ right_rotate(a,22)
      maj = (a & b) ^ (a & c) ^ (b & c)
      temp2 = s0 + maj

      h = g
      g = f
      f = e
      e = d + temp1
      d = c
      c = b
      b = a
      a = temp1 + temp2

    dH = [a,b,c,d,e,f,g,h]
    for i in range(8):
      self.H[i] = (self.H[i] + dH[i]) & wordmask

# simple implementation of sha256

if __name__ == "__main__":
  print(I)
  a = sha()
  t = b"Hello"*1000
  open("inp1","wb").write(t)
  h = a.finalize(t)
  print(h)
  print(word_array_to_hex(h))

This file generates the constants

import math

# generate constants for sha256
def isprime(x):
  for i in range(2,int(1+x**0.5)):
    if x % i == 0:
      return False
  return True
def tofrac(x):
  f = x - math.floor(x)
  g = f * 2**32
  h = int(g)
  return h
def gen_consts():
  a = list(filter(isprime,range(2,312)))[:64]
  b = a[:8]
  I = list(map(lambda t: tofrac(t**(1/2)),b))
  K = list(map(lambda t: tofrac(t**(1/3)),a))
  return (I,K)
I,K = gen_consts()

if __name__ == "__main__":
  print("I")
  for i,x in enumerate(I):
    print(f"{i+1:2d} {x:08x}")
  print("K")
  for i,x in enumerate(K):
    print(f"{i+1:2d} {x:08x}")

The following implements the correct shifting and rotating behaviour

wordmask = 0xffffffff # for taking remainder modulo 2**32-1

def right_rotate(x,n):
  "rotate x n bits to the right"
  n %= 32
  x &= wordmask
  l = x << (32 - n) & wordmask
  r = x >> n
  return l | r

def right_shift(x,n):
  "shift x n bits to the right"
  x &= wordmask
  return x >> n

def test_f(t,f,x,n):
  o = f(x,n)
  print(f"{x:08x} => {o:08x} ({t}: {x} by {n})")

if __name__ == "__main__":
  test_f("right_rotate",right_rotate,5,1)
  test_f("right_rotate",right_rotate,246,3)
  test_f("right_rotate",right_rotate,534,7)
  test_f("right_rotate",right_rotate,52,13)

  test_f("right_shift",right_shift,5,1)
  test_f("right_shift",right_shift,246,3)

The following code generates the correct constants for the Rust implementation I wrote.

#!/usr/bin/env python3
import math

ofn = "src/consts.rsi"

# generate constants for sha256
def isprime(x):
  for i in range(2,int(1+x**0.5)):
    if x % i == 0:
      return False
  return True
def tofrac(x):
  f = x - math.floor(x)
  g = f * 2**32
  h = int(g)
  return h
def gen_consts():
  a = list(filter(isprime,range(2,312)))[:64]
  b = a[:8]
  I = list(map(lambda t: tofrac(t**(1/2)),b))
  K = list(map(lambda t: tofrac(t**(1/3)),a))
  return (I,K)
I,K = gen_consts()

def print_consts(ty,name,vals):
  n = len(vals)
  print(f"const {name} : [{ty} ; {n}] = [")
  for i,val in enumerate(vals):
    if i<len(vals)-1:
      print(f"  0x{val:08x},",end="")
      if i%4 == (4-1):
        print()
    else:
      print(f"  0x{val:08x}")
  print("];")

if __name__ == "__main__":
  print("mod sha256_consts {")
  print_consts("u32","I",I)
  print_consts("u32","K",K)   
  print("}")