main.py
# main.py
#
# Non-Deterministic Processor (NDP) - efficient parallel SAT-solver
# Copyright (c) 2023 GridSAT Stiftung
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
# GridSAT Stiftung - Georgstr. 11 - 30159 Hannover - Germany - ipfs: gridsat.eth/ - info@gridsat.io
#
import os, sys
import time
import argparse, textwrap
import ray
import logging
# Initialize Ray with the current directory as the working directory
def initialize_ray():
if not ray.is_initialized():
current_dir = os.path.dirname(os.path.abspath(__file__))
# Set Ray's logging level to only show errors
ray.init(runtime_env={"working_dir": current_dir}, logging_level=logging.ERROR)
cluster_resources = ray.cluster_resources()
num_cpus = cluster_resources.get("CPU", 1) # Defaults to 1 if not available
print(f"\n\n\nNDP started.")
return cluster_resources, num_cpus
cluster_resources, num_cpus = initialize_ray()
from audioop import mul
from copy import deepcopy
from Multiply import Multiply
from Set import *
from Clause import *
from PatternSolver import *
from InputReader import InputReader
import configs
import traceback
from Factorizer import Factorizer
from byebye import bye_art
# todo:
#
# - Handle if input has [x, -x]. What I did now is to normalize the clause once it get read. However, this will not enable us to
# view the initial set provided. Will see only the normalized version. The solution is to write normalize() method in each Clause and Set classes.
# and in the evaluation loop, we call the method normalize() before to_lo_condition(). However, do we need to normalize each set? or it's just the root set?
# This needs to be thought of well because we don't want to add extra time in the evaluation loop if we won't need normalization except for the root set.
# Currently it works fine with the current implementation as it focuses only on root, but we just don't save the unnormalized version for the root set.
# - enable multiple Ray Clusters
# - enable GPU
def display_ascii_art(ascii_art):
print(ascii_art)
# a class to represent the CNF graph
class CnfGraph:
content = None
def __init__(self, content = None):
self.content = content
def print_node(self):
logger.info(self.content)
def Main(args):
# determine input type/format
input_type = None
input_content = None
if args.line_input:
input_type = INPUT_SL
input_content = args.line_input
elif args.line_input_file:
input_type = INPUT_SLF
input_content = args.line_input_file
elif args.dimacs:
input_type = INPUT_DIMACS
input_content = args.dimacs
# Determine the input file name
input_file_name = None
if args.line_input_file or args.dimacs:
input_file_name = os.path.basename(input_content.name) if hasattr(input_content, 'name') else None
# begin logic
CnfSet = None
try:
input_reader = InputReader(input_type, input_content)
CnfSet = input_reader.get_cnf_set()
# Tasks: Factorization
if args.factorize:
fact = Factorizer()
if not fact.preprocess_set(CnfSet):
args.factorize = False
if args.multiply:
mul = Multiply()
if not mul.preprocess_set(CnfSet, args.multiply[0], args.multiply[1]):
args.multiply = False
sys.exit(0)
# check if any clause evaluated to False afer substitution
for cl in CnfSet.clauses:
if cl.value == False:
logger.info("The input set is NOT satisfiable with input factors.")
logger.info(f"The input numbers {args.multiply[0]} and {args.multiply[1]} can't be multiplied on the input CNF")
sys.exit(0)
# copy the cnf to be used in verification step if needed as the CNF will be subject to rename and manipulation later.
originalCnf = deepcopy(CnfSet)
# start processing the root set
if len(CnfSet.clauses) > 0 or CnfSet.value != None:
PAT = PatternSolver(args=args, problem_id=CnfSet.get_hash().hex(), cluster_resources=cluster_resources, input_file=input_file_name)
PAT.solve_set(CnfSet)
# save solution in a file
if args.output_solution_file and PAT.solution:
solution = PAT.format_solution(PAT.solution)
file_name = PAT.problem_id
# if input is a file
if input_type != args.line_input:
file_name = os.path.splitext(input_content.name)[0]
file_name += '_' + args.mode
file_name += '.sol'
fout = open(file_name, 'w')
fout.write(solution)
fout.close()
logger.info(f"Solution written to: {file_name}")
# verify the solution
if args.verify and PAT.solution:
if PAT.verify_solution(originalCnf, PAT.solution):
logger.info("Solution is VERIFIED!\n\n\n\n")
else:
logger.info("The solution is NOT correct! ****")
except Exception as e:
logger.critical("Error - {0}".format(str(e)))
logger.critical("Error - {0}".format(traceback.format_exc()))
if __name__ == "__main__":
start_time = time.time()
class Formatter(argparse.RawTextHelpFormatter, argparse.ArgumentDefaultsHelpFormatter): pass
parser = argparse.ArgumentParser(description="NDP [OPTIONS]", formatter_class=argparse.RawTextHelpFormatter)
group1 = parser.add_mutually_exclusive_group()
group1.add_argument("-v", "--verbos", help="Verbos", action="store_true")
group1.add_argument("-vv", "--very-verbos", help="Very verbos", action="store_true")
group1.add_argument("-q", "--quiet", help="Quiet mode = no subprocess output.", action="store_true")
group1.add_argument("-qn", "--quiet-but-unique-nodes", help="Quiet mode. Except outputting number of unique nodes.", action="store_true")
group2 = parser.add_mutually_exclusive_group(required=True)
group2.add_argument("-l", "--line-input", type=str, help="Represent the input set in one line. Format: a|b|c&d|e|f ...")
group2.add_argument("-lf", "--line-input-file", type=argparse.FileType('r'), help="Represent the input set in one line stored in a file. Format: a|b|c&d|e|f ...")
group2.add_argument("-d", "--dimacs", type=argparse.FileType('r'), help="File name to contain the set in DIMACS format. See https://bit.ly/dimcasf")
parser.add_argument("-g", "--output-graph-file", type=str, help="Output graph file in Graphviz format")
parser.add_argument("-s", "--output-solution-file", action="store_true", help="Output solution file.")
parser.add_argument("-ns", "--no-stats", help="Short concise output - no stats - this will disable the global database option.", action="store_true")
parser.add_argument("-t", "--threads", type=int, help="Number of threads. Value 1 = no multithreading, 0 = max concurrent available threads. This option will implicitly enable the global DB.", default=0)
parser.add_argument("-e", "--exit-upon-solving", help="Exit whenever a solution is found.", action="store_true")
parser.add_argument("-verify", "--verify", help="Verify the solution at the end, if any.", action="store_true")
parser.add_argument("-rdb", "--use-runtime-db", help="Use database for set lookup in table established only for the current cnf", action="store_true")
parser.add_argument("-gdb", "--use-global-db", help="Use database for set lookup in global sets table", action="store_true")
parser.add_argument("-gnm", "--gdb-no-mem", help="Don't load hashes from global DB into memory. Only use if gdb gets huge and doesn't fit memory. (slower)", action="store_true")
parser.add_argument("-z", "--sort-by-size", help="Always sort clauses by size in ascending order.", action="store_true")
parser.add_argument("-sm", "--start-mode", help="Use mode while prepare sub-processes (options as -m)", choices=['flo', 'flop', 'lo', 'lou', 'normal'], default=None)
parser.add_argument("-thief", "--thief-method", help="VERY effizient for FACT of Purdom-Sabry input format: Always sort clauses by length and initial index.", action="store_true")
parser.add_argument("-fact", "--factorize", help="Factorize the input number if not prime.", action="store_true")
parser.add_argument("-mult", "--multiply", nargs=2, type=int, help="Multiply two numbers with bit-range. NOTE: will not generate total MULT-circuit!")
parser.add_argument("-m", "--mode", help=textwrap.dedent('''\nSolution modi:\n
L.O. condition = Linearily Ordered: all variables appear in the ascending order
L.O.U. condition = Linearily Ordered Unsorted: clause Set L.O. but unsorted\n
flo: all nodes converted to L.O. (default)
flop: all nodes converted to L.O. with clauses sorted per size.
lo: only the root node is converted to L.O. with the rest converted to L.O.U.
lou: all nodes converted to L.O.U. condition.
normal: no preprocessing except ascending sorting of VARs within each clause.\n
'''), choices=['flo', 'flop', 'lo', 'lou', 'normal'], default="flo")
parser.add_argument('--version', action='version', version='%(prog)s ') # can use GitPython to automatically get latest tag here
parser.add_argument("-b", "--bye-art", help="Opt-out of displaying ASCII art at the end.\n\n", action="store_true")
# The algorithm
# --------------
# START:
# if FLOP:
# place unit clauses first
# rename VARs
# sort within clauses
# if FLO or FLOP or (L.O. root node only):
# sort clauses()
#
# check if it meets L.O. condition, if not go to step START
args = parser.parse_args()
if args.quiet_but_unique_nodes:
args.quiet = True
if args.quiet:
logger.setLevel(logging.CRITICAL)
# if threads is set, enable gdb
# A long note regarding multithreading and gdb:
#---------------------------------------------
# When we solve a problem using multithreading/processes, each thread will process a subtree.
# All thread should check a common storage (gdb in this case) to check for common nodes. This check
# will avoid processing of a subtree that's been already processed by another thread. This is a major
# contribution of the theory behind the solution of course. However, in order to achieve that, each
# thread need to check the DB for "every node" it processes. Let's call the time required for this
# operation D time, whereas if the thread the time required to process the node is P time.
# So without the checking of the common node, the thread will spend P x n (n is # of nodes in the tree)
# to process the tree, whereas with common DB, it'll need (P + D) x n` (where n` is number of unique nodes)
# So the idea here is that in order to make sense to have the DB of common nodes, the cost of processing the
# common subtree must be larger than D x n`. In other words, P x n`` > D x n`. Where n`` is the size of the
# common subtree(s).
# That being said, it's been found that almost always, the common subtrees are far less than unique ones.
# Also, for most of nodes, especially on LOU mode where bringing the node to LO condition requires only one
# iteration, P is very small that it's less than D time.
# This concludes the fact that P x n`` < D x n` and the cost of having common storage "in the current implementation"
# is bad.
# Recommendations:
# - We need another common nodes storage different than postgres when checking the existence of the node is less than processing it.
# - The above conclution is valid by experiment for LOU, and partially for LO. We need to do more experiemnts for LO, FLO and FLOP
# where processing a node can take longer than LOU to draw the same conclusion, otherwise the flag to use gdb can be automatically set
# with these modes and problem size.
# - For now, let's disable the automatic activation of gdb with multithreading.
#
# otherwise if we want to use gdb, we can explicitly set the commandline option to do so.
# if args.threads > 1:
# args.use_global_db = True
if args.threads < 0:
logger.info("Option -t must be a positive number.")
parser.print_help()
sys.exit(3)
# Determine the maximum number of CPUs available
max_cpus = os.cpu_count() if not ray.is_initialized() else int(cluster_resources.get("CPU", 1))
# Check if the -t argument is set and exceeds the maximum CPUs available
if args.threads and args.threads > max_cpus:
response = input(f"The specified number of threads (-t {args.threads}) exceeds the maximum available CPUs ({max_cpus}). "
"Would you like to use the maximum available CPUs instead? (y/n): ").strip().lower()
if response == "y":
args.threads = max_cpus
print(f"Setting number of threads to the maximum available CPUs: {max_cpus}")
else:
print("Please specify a lower value for -t or remove the -t option to use the maximum available CPUs.")
sys.exit(1)
# at least one input must be provided
if args.line_input == None and args.line_input_file == None and args.dimacs == None:
logger.info("No input provided. Please provide any of the input arguments.")
parser.print_help()
sys.exit(3)
# only one input must be provided
if (args.line_input and args.line_input_file) or (args.line_input and args.dimacs) or (args.dimacs and args.line_input_file):
logger.info("Please provide only one input.")
parser.print_help()
sys.exit(3)
# only use -gnm if -gdb is set
if args.gdb_no_mem and not args.use_global_db:
parser.error('-gnm/--gdb-no-mem MUST be used with -gdb/--use-global-db option')
if args.multiply and ((args.multiply[0] <= 1) or (args.multiply[1] <= 1)):
parser.error('-mult/--multiply option MUST be used with integers > 1')
if args.verbos:
logger.setLevel(logging.INFO)
elif args.very_verbos:
logger.setLevel(logging.DEBUG)
elif args.quiet:
logger.setLevel(logging.CRITICAL)
if args.start_mode is None:
args.start_mode = args.mode
Main(args)
if not args.bye_art:
display_ascii_art(bye_art)
logger.info('\nalien-tech at its best. but better.\n\n\n')
# Set the global logging level to CRITICAL to suppress lower-level logs
logging.getLogger().setLevel(logging.CRITICAL)