#!/usr/bin/env python3

import os
import sys

o,p,host,trim = sys.argv[1:5]

impls = []

for line in sys.stdin:
  line = line.strip().split('/')
  if line[0] != o: continue
  if line[1] != p: continue
  impls += [line[2:]]

print('operation %s' % o)
print('primitive %s' % p)

icarch = {}
iccompiler = {}

for i,c in impls:
  with open('compilerarch/%s' % c) as f:
    icarch[i,c] = f.read().strip()
  with open('compilerversion/%s' % c) as f:
    iccompiler[i,c] = f.read().strip()

def archkey(a):
  if a == 'default': return 1,a # put default last
  return -a.count('+'),a

allimpls = sorted(set(i for i,c in impls))
with open('allarches') as f:
  allarches = f.read().splitlines()
allarches = sorted(set(allarches),key=archkey)

prioritydata = []

for i in allimpls:
  priorityfn = 'priority/%s-%s-%s' % (o,p,i)
  if not os.path.exists(priorityfn): continue

  with open(priorityfn) as f:
    for line in f:
      line = line.split()
      if len(line) < 7: continue
      prio,score,priohost,cpuid,version,machine = line[:6]
      c = ' '.join(line[6:])
      prio = float(prio)
      prioritydata += [(i,prio,score,priohost,cpuid,machine,c)]

def asupportsic(a,i,c):
  a = a.split('+')[1:]
  ica = icarch[i,c]
  ica = ica.split('+')[1:]
  return all(icapart in a for icapart in ica)

def cpuidsupports(cpuid,a):
  a = a.split('+')
  cpuid = [int('0x'+cpuid[8*j:8*j+8],16) for j in range(32)]
  mmx = cpuid[18] & (1<<23)
  sse = cpuid[18] & (1<<25)
  sse2 = cpuid[18] & (1<<26)
  sse3 = cpuid[17] & (1<<0)
  ssse3 = cpuid[17] & (1<<9)
  sse41 = cpuid[17] & (1<<19)
  sse42 = cpuid[17] & (1<<20)
  popcnt = cpuid[17] & (1<<23)
  osxsave = cpuid[17] & (1<<27)
  avx = cpuid[17] & (1<<28)
  bmi1 = cpuid[20] & (1<<3)
  avx2 = cpuid[20] & (1<<5)
  bmi2 = cpuid[20] & (1<<8)
  avx512f = cpuid[20] & (1<<16)
  avx512dq = cpuid[20] & (1<<17)
  adx = cpuid[20] & (1<<19)
  avx512ifma = cpuid[20] & (1<<21)
  avx512vl = cpuid[20] & (1<<31)
  waitpkg = cpuid[21] & (1<<5)
  vaes = cpuid[21] & (1<<9)
  sse4a = cpuid[25] & (1<<6)
  xmmsaved = cpuid[27] & (1<<1)
  ymmsaved = cpuid[27] & (1<<2)
  for apart in a[1:]:
    if apart not in ('sse3','ssse3','sse41','sse42','sse4a','popcnt','adx','avx','bmi1','bmi2','avx2','avx512f','avx512vl','avx512dq','avx512ifma','vaes','waitpkg'):
      raise ValueError('cpuidsupports does not understand %s' % apart)

    if not mmx: return False
    if not sse: return False
    if not sse2: return False

    if apart == 'sse3' and not sse3: return False
    if apart == 'ssse3' and not ssse3: return False
    if apart == 'sse41' and not sse41: return False
    if apart == 'sse42' and not sse42: return False
    if apart == 'sse4a' and not sse4a: return False
    if apart == 'popcnt' and not popcnt: return False
    if apart == 'adx' and not adx: return False
    if apart == 'avx' and not avx: return False
    if apart == 'bmi1' and not bmi1: return False
    if apart == 'bmi2' and not bmi2: return False
    if apart == 'avx2' and not avx2: return False
    if apart == 'avx512f' and not avx512f: return False
    if apart == 'avx512vl' and not avx512vl: return False
    if apart == 'avx512dq' and not avx512dq: return False
    if apart == 'avx512ifma' and not avx512ifma: return False
    if apart == 'vaes' and not vaes: return False
    if apart == 'waitpkg' and not waitpkg: return False

    if apart.startswith('avx'):
      if not osxsave: return False
      if not xmmsaved: return False
      if not ymmsaved: return False
  return True

def selectic(a,aexclude):
  if len(aexclude) > 0:
    print('note: considering other machines supporting %s' % a)
  else:
    print('note: considering machines supporting %s' % a)

  # requirement: icarch[i,c] is a subset of a
  compatibleimpls = [(i,c) for i,c in impls if asupportsic(a,i,c)]
  assert len(compatibleimpls) > 0

  # desideratum: good performance based on prioritydata

  directmatches = any(
    priohost == host
    and cpuidsupports(cpuid,a)
    and all(not cpuidsupports(cpuid,b) for b in aexclude)
    for i,prio,score,priohost,cpuid,machine,c in prioritydata
    )

  if not directmatches:
    print('note: no direct matches, so extrapolating from all machines')

  totalprio = {(i,c):0 for i,c in compatibleimpls}
  totalweight = {(i,c):0 for i,c in compatibleimpls}

  for prioi,prio,score,priohost,cpuid,machine,prioc in prioritydata:
    if directmatches:
      if priohost != host: continue
      if any(cpuidsupports(cpuid,b) for b in aexclude): continue
      if not cpuidsupports(cpuid,a): continue

    for i,c in compatibleimpls:
      if i != prioi: continue

      # XXX: use more serious machine learning here
      weight = 1.0
      if priohost == host: weight *= 10
      if cpuidsupports(cpuid,a): weight *= 10
      if all(not cpuidsupports(cpuid,b) for b in aexclude): weight *= 10
      weight *= 1+len(os.path.commonprefix([iccompiler[i,c],prioc]))
      if iccompiler[i,c] == prioc: weight *= 10

      # print('note: weight %s from %s %s %s %s for %s %s' % (weight,prio,machine,prioi,prioc,i,c))

      totalprio[i,c] += prio*weight
      totalweight[i,c] += weight

  # note that implementations without priority data are excluded from ranking
  ranking = [(totalprio[i,c]/totalweight[i,c],i,c) for i,c in compatibleimpls if totalweight[i,c] > 0]
  ranking.sort()

  for prio,i,c in ranking:
    print('note: priority %s for %s %s' % (prio,i,c))

  if len(ranking) == 0:
    return compatibleimpls[0]
  return ranking[0][1:]

usedimpls = set()
handledarches = set()
for a in allarches:
  i,c = selectic(a,handledarches)
  usedimpls.add((i,c))
  print('selected %s %s %s' % (a,i,c))
  handledarches.add(a)

for i,c in impls:
  if (i,c) in usedimpls or trim == 'False':
    print('impl %s %s' % (i,c))