lang/python/ SHA256


Here is the source for a Python implementation of SHA256. The first file implements the algorithm and, following it, are the sources for consts.py and shifts.py. See also the rust and C versions. I would not advise trying to write one in pure Haskell.

import struct

from consts import I,K
from shifts import right_rotate, right_shift

wordmask = 0xffffffff # for taking remainder modulo 2**32-1

def word_array_to_hex(words):
  t = ""
  for word in words:
    t += f"{word:08x}"
  return t

class sha:
  block_size_bytes = 512 // 8
  def __init__(self):
    self.H = [x for x in I]
    self.data = bytearray()
    self.length_in_bits = 0

  def update(self,data):
    # convert data to utf8 if needed
    if isinstance(data,str):
      data = data.encode("utf8")

    # append data to self.data
    # if len(self.data) >= 512, process blocks and consume until
    # len(self.data) < 512
    self.length_in_bits += len(data) * 8
    self.data += data
    while len(self.data) > sha.block_size_bytes:
      self._process_block()

  def finalize(self,data):
    self.update(data) # append data and process any blocks as needed
    # pad with 0x80 0x00 .... 0x00 L where L is 8 bytes and is the length of the data in bits
    # at least 9 bytes are added. so we need to take len(self.data) + 9 and round to next block_size_bytes
    # so len(self.data) + 9 + x = 0 (mod 512//8)
    # we know that len(self.data) < 512
    # so x = (1024//8 - 9 - len(self.data)) % 512//8
    # self.data += bytearray
    num_zero_bytes = (2*sha.block_size_bytes - 9 - len(self.data)) % sha.block_size_bytes
    len_be = struct.pack(">Q",self.length_in_bits)
    pad = b'\x80' + (b'\x00' * num_zero_bytes) + len_be
    self.data += pad
    try:
      assert(len(self.data)%64 == 0)
    except AssertionError:
      print("Assert error 1")
      print(num_zero_bytes,len_be,len(self.data),len(self.data)%64)
      raise
    self._process_block()
    return self.H

  def _process_block(self):
    block = self.data[:sha.block_size_bytes] # remove one block from start of data
    print(block)
    self.data = self.data[sha.block_size_bytes:]
    w = list(struct.unpack(">"+("L"*16),block)) + [0]*48
    #print("block",w[:16])
    #print("array",w)
    #exit()
    for i in range(16,64):
      s0 = right_rotate(w[i-15], 7) ^ right_rotate(w[i-15], 18) ^ right_shift(w[i-15],3)
      s1 = right_rotate(w[i-2], 17) ^ right_rotate(w[i-2], 19)  ^ right_shift(w[i-2],10)
      w[i] = (w[i-16] + s0 + w[i-7] + s1) & wordmask

    a, b, c, d, e, f, g, h = self.H

    for i in range(64):
      s1 = right_rotate(e,6) ^ right_rotate(e,11) ^ right_rotate(e,25)
      ch = (e & f) ^ ((~e) & g)
      temp1 = (h + s1 + ch + K[i] + w[i]) & wordmask
      s0 = right_rotate(a,2) ^ right_rotate(a,13) ^ right_rotate(a,22)
      maj = (a & b) ^ (a & c) ^ (b & c)
      temp2 = s0 + maj

      h = g
      g = f
      f = e
      e = d + temp1
      d = c
      c = b
      b = a
      a = temp1 + temp2

    dH = [a,b,c,d,e,f,g,h]
    for i in range(8):
      self.H[i] = (self.H[i] + dH[i]) & wordmask

# simple implementation of sha256

if __name__ == "__main__":
  print(I)
  a = sha()
  t = b"Hello"*1000
  open("inp1","wb").write(t)
  h = a.finalize(t)
  print(h)
  print(word_array_to_hex(h))

This file generates the constants

import math

# generate constants for sha256
def isprime(x):
  for i in range(2,int(1+x**0.5)):
    if x % i == 0:
      return False
  return True
def tofrac(x):
  f = x - math.floor(x)
  g = f * 2**32
  h = int(g)
  return h
def gen_consts():
  a = list(filter(isprime,range(2,312)))[:64]
  b = a[:8]
  I = list(map(lambda t: tofrac(t**(1/2)),b))
  K = list(map(lambda t: tofrac(t**(1/3)),a))
  return (I,K)
I,K = gen_consts()

if __name__ == "__main__":
  print("I")
  for i,x in enumerate(I):
    print(f"{i+1:2d} {x:08x}")
  print("K")
  for i,x in enumerate(K):
    print(f"{i+1:2d} {x:08x}")

The following implements the correct shifting and rotating behaviour

wordmask = 0xffffffff # for taking remainder modulo 2**32-1

def right_rotate(x,n):
  "rotate x n bits to the right"
  n %= 32
  x &= wordmask
  l = x << (32 - n) & wordmask
  r = x >> n
  return l | r

def right_shift(x,n):
  "shift x n bits to the right"
  x &= wordmask
  return x >> n

def test_f(t,f,x,n):
  o = f(x,n)
  print(f"{x:08x} => {o:08x} ({t}: {x} by {n})")

if __name__ == "__main__":
  test_f("right_rotate",right_rotate,5,1)
  test_f("right_rotate",right_rotate,246,3)
  test_f("right_rotate",right_rotate,534,7)
  test_f("right_rotate",right_rotate,52,13)

  test_f("right_shift",right_shift,5,1)
  test_f("right_shift",right_shift,246,3)

The following code generates the correct constants for the Rust implementation I wrote.

#!/usr/bin/env python3
import math

ofn = "src/consts.rsi"

# generate constants for sha256
def isprime(x):
  for i in range(2,int(1+x**0.5)):
    if x % i == 0:
      return False
  return True
def tofrac(x):
  f = x - math.floor(x)
  g = f * 2**32
  h = int(g)
  return h
def gen_consts():
  a = list(filter(isprime,range(2,312)))[:64]
  b = a[:8]
  I = list(map(lambda t: tofrac(t**(1/2)),b))
  K = list(map(lambda t: tofrac(t**(1/3)),a))
  return (I,K)
I,K = gen_consts()

def print_consts(ty,name,vals):
  n = len(vals)
  print(f"const {name} : [{ty} ; {n}] = [")
  for i,val in enumerate(vals):
    if i<len(vals)-1:
      print(f"  0x{val:08x},",end="")
      if i%4 == (4-1):
        print()
    else:
      print(f"  0x{val:08x}")
  print("];")

if __name__ == "__main__":
  print("mod sha256_consts {")
  print_consts("u32","I",I)
  print_consts("u32","K",K)   
  print("}")