Skip to content

classifier

Classifier dataclass

Source code in c3p/classifier.py
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
@dataclass
class Classifier:
    program_directory: Path = PROGRAM_DIR
    strict: bool = False
    cache: Optional[Cache] = None

    def classify_iter(self, smiles: Union[SMILES_STRING, List[SMILES_STRING]]) -> Iterator[ClassificationResult]:
        """
        Classify a SMILES string or list of SMILES strings using all programs in the given directory.

        Args:
            smiles:

        Returns:

        """
        cache = self.cache
        if cache:
            remaining_smiles = []
            for s in smiles:
                if s in cache:
                    yield from cache[s]
                else:
                    remaining_smiles.append(s)
            if remaining_smiles:
                for result in classify(remaining_smiles, self.program_directory, strict=self.strict):
                    if result.input_smiles not in cache:
                        cache[result.input_smiles] = []
                    cache[result.input_smiles].append(result)
                    yield result
        else:
            yield from classify(smiles, self.program_directory, strict=self.strict)

    def classify(
        self, smiles: Union[SMILES_STRING, List[SMILES_STRING]]
    ) -> List[ClassificationResult]:
        """
        Classify a SMILES string or list of SMILES strings using all programs in the given directory.

        Args:
            smiles:

        Returns:

        """
        return list(self.classify_iter(smiles))

classify(smiles)

Classify a SMILES string or list of SMILES strings using all programs in the given directory.

Parameters:

Name Type Description Default
smiles Union[SMILES_STRING, List[SMILES_STRING]]
required

Returns:

Source code in c3p/classifier.py
140
141
142
143
144
145
146
147
148
149
150
151
152
def classify(
    self, smiles: Union[SMILES_STRING, List[SMILES_STRING]]
) -> List[ClassificationResult]:
    """
    Classify a SMILES string or list of SMILES strings using all programs in the given directory.

    Args:
        smiles:

    Returns:

    """
    return list(self.classify_iter(smiles))

classify_iter(smiles)

Classify a SMILES string or list of SMILES strings using all programs in the given directory.

Parameters:

Name Type Description Default
smiles Union[SMILES_STRING, List[SMILES_STRING]]
required

Returns:

Source code in c3p/classifier.py
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
def classify_iter(self, smiles: Union[SMILES_STRING, List[SMILES_STRING]]) -> Iterator[ClassificationResult]:
    """
    Classify a SMILES string or list of SMILES strings using all programs in the given directory.

    Args:
        smiles:

    Returns:

    """
    cache = self.cache
    if cache:
        remaining_smiles = []
        for s in smiles:
            if s in cache:
                yield from cache[s]
            else:
                remaining_smiles.append(s)
        if remaining_smiles:
            for result in classify(remaining_smiles, self.program_directory, strict=self.strict):
                if result.input_smiles not in cache:
                    cache[result.input_smiles] = []
                cache[result.input_smiles].append(result)
                yield result
    else:
        yield from classify(smiles, self.program_directory, strict=self.strict)

check_class_membership(smiles_list, name, code, strict=False)

Check if the given SMILES strings belong to the class defined in the given code.

Parameters:

Name Type Description Default
smiles_list List[SMILES_STRING]
required
name str
required
code str
required
strict
False

Returns:

Source code in c3p/classifier.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
def check_class_membership(smiles_list: List[SMILES_STRING], name: str, code: str, strict=False) -> ClassificationResult:
    """
    Check if the given SMILES strings belong to the class defined in the given code.

    Args:
        smiles_list:
        name:
        code:
        strict:

    Returns:

    """
    found = False
    for line in code.split("\n"):
        # use re to check if matches ^def is_(.*)\(smiles: str):
        import re
        # matches function
        fn_match = re.match(r"^def (is_.*)\(smiles", line)
        if fn_match:
            found = True
            function_name = fn_match.group(1)
            for smiles in smiles_list:
                try:
                    logger.info(f"Running {function_name} in {name} for {smiles}")
                    for _, satisfies, reason, metadata in run_code(code, function_name, [smiles], []):
                        cc = metadata.get("chemical_class", {})
                        if satisfies:
                            confidence = metadata.get("precision")
                        else:
                            # NPV = TN / (TN + FN)
                            tn = metadata.get("num_true_negatives", 0)
                            fn = metadata.get("num_false_negatives", 0)
                            confidence = tn / (tn + fn) if tn + fn > 0 else None
                        logger.info(f"{name} {function_name} {smiles} -> {satisfies}")
                        yield ClassificationResult(
                            input_smiles=smiles,
                            class_id=cc.get("id", "-"),
                            class_name=cc.get("name", "-"),
                            is_match=satisfies,
                            reason=reason,
                            confidence=confidence
                        )
                except Exception as e:
                    if strict:
                        raise e
                    logger.error(f"Error running {function_name} in {name}: {e}")
            break
    if not found:
        if strict:
            raise ValueError(f"Could not find is_ function in {name}")

classify(smiles, program_directory=None, chemical_classes=None, strict=False)

Classify a SMILES string or list of SMILES strings using all programs in the given directory.

Parameters:

Name Type Description Default
smiles Union[SMILES_STRING, List[SMILES_STRING]]

The SMILES string to classify

required
program_directory Optional[Path]

The directory containing the programs

None
chemical_classes Optional[List[str]]

The classes to include

None

Returns:

Type Description
Iterator[ClassificationResult]

The classification result

Source code in c3p/classifier.py
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
def classify(
        smiles: Union[SMILES_STRING, List[SMILES_STRING]],
        program_directory: Optional[Path] = None,
        chemical_classes: Optional[List[str]] = None,
        strict=False,
    ) -> Iterator[ClassificationResult]:
    """
    Classify a SMILES string or list of SMILES strings using all programs in the given directory.

    Args:
        smiles: The SMILES string to classify
        program_directory: The directory containing the programs
        chemical_classes: The classes to include

    Returns:
        The classification result
    """
    # find all programs in path
    if program_directory is None:
        program_directory = PROGRAM_DIR
    programs = list(program_directory.glob("*.py"))
    logger.info(f"Found {len(programs)} programs in {program_directory}")
    if chemical_classes:
        logger.info(f"Filtering for classes: {chemical_classes}")
        chemical_classes = [safe_name(c) for c in chemical_classes]
    smiles_list = [smiles] if isinstance(smiles, str) else smiles
    # load each program
    for program in programs:
        if program.name.startswith("__"):
            continue
        chemical_name = program.name.replace(".py", "")
        if chemical_classes and chemical_name not in chemical_classes:
            logger.debug(f"Skipping {chemical_name} as not in inclusion list: {chemical_classes}")
            continue
        logger.info(f"Running {chemical_name} on {len(smiles_list)} SMILES")
        with open(program, "r") as f:
            code = f.read()
        yield from check_class_membership(smiles_list, chemical_name, code, strict=strict)