#!/usr/bin/env python3

import sys

types = []
taxonomy = []
superclasses = []

OUTPUT_FORMAT = "subclass" # subclass(X, Y), triple(X, "subclass", Y) or turtle

def find_superclasses(taxonomy_lines, subclass):
    for line in taxonomy_lines:
        parts = line.strip().split()
        if len(parts) == 4 and parts[3] == ".":
            if parts[1] == "rdfs:subClassOf":
                if parts[0] == subclass and parts[2] not in superclasses:
                    superclasses.append(parts[2])
                    if OUTPUT_FORMAT == "subclass":
                        print(f'subclass("{parts[0]}", "{parts[2]}").')
                    elif OUTPUT_FORMAT == "triple":
                        print(f'triple("{parts[0]}", subclass, "{parts[2]}").')
                    elif OUTPUT_FORMAT == "turtle":
                        print(f'{parts[0]}    {parts[1]}    {parts[2]}  .')
                    find_superclasses(taxonomy_lines, parts[2])
    

def print_superclass_triples(taxonomy, types):
    for line in taxonomy:
        parts = line.strip().split()
        if len(parts) == 4 and parts[3] == ".":
            if parts[1] == "rdfs:subClassOf":
                if parts[0] in types:
                    find_superclasses(taxonomy, parts[2])
                    if OUTPUT_FORMAT == "subclass":
                        print(f'subclass("{parts[0]}", "{parts[2]}").')
                    elif OUTPUT_FORMAT == "triple":
                        print(f'triple("{parts[0]}", subclass, "{parts[2]}").')
                    elif OUTPUT_FORMAT == "turtle":
                        print(f'{parts[0]}    {parts[1]}    {parts[2]}  .')




if __name__ == "__main__":
    file_output = False
    if len(sys.argv) >= 5 or len(sys.argv) <= 2:
        """
        Types file format:
        yago:RDF_Type_Name
        yago:RDF_Type_Name2
        """
        print("Usage: python select_taxonomy.py types_file taxonomy_file [output_file]")
        sys.exit(1)
    if len(sys.argv) == 4:
        types_file = sys.argv[1]
        taxonomy_file = sys.argv[2]
        output_file = sys.argv[3]
        file_output = True
    else:
        types_file = sys.argv[1]
        taxonomy_file = sys.argv[2]

    with open(types_file, 'r') as f:
        for esttype in f:
            types.append(esttype.strip())

    with open(taxonomy_file, 'r') as f:
        for line in f:
            taxonomy.append(line)
            
    if file_output:
        print(f'Output written to file: {output_file}')
        with open(output_file, 'w') as sys.stdout:
            print_superclass_triples(taxonomy, types)
            print_superclass_triples(taxonomy, superclasses)
        
    else:
        print_superclass_triples(taxonomy, types)
        print_superclass_triples(taxonomy, superclasses)