From d1e44a2af71c1d26910a9ef3f1dfdaf6995163f9 Mon Sep 17 00:00:00 2001 From: Dennis Terhorst Date: Wed, 15 Nov 2023 18:49:29 +0100 Subject: [PATCH] next part of refactoring UserDocExtractor into classes --- doc/htmldoc/_ext/extractor_userdocs.py | 149 ++++++++++--------------- 1 file changed, 57 insertions(+), 92 deletions(-) diff --git a/doc/htmldoc/_ext/extractor_userdocs.py b/doc/htmldoc/_ext/extractor_userdocs.py index 317f4e2f6f..a88d7c6267 100644 --- a/doc/htmldoc/_ext/extractor_userdocs.py +++ b/doc/htmldoc/_ext/extractor_userdocs.py @@ -39,7 +39,7 @@ from tqdm import tqdm -logging.basicConfig(level=logging.INFO) +logging.basicConfig(level=logging.DEBUG) log = logging.getLogger(__name__) @@ -235,6 +235,50 @@ def extract(self, filename: Path): return UserDoc(doc, tags, self._outdir / outname) + def CreateTagIndices(self) -> list[str]: + """ + This function generates all combinations of tags and creates an index page + for each combination using `rst_index`. + + Returns + ------- + + list + list of names of generated files. (relative to `_outdir`) + """ + tags = self.tagdict + taglist = list(tags.keys()) + maxtaglen = max([len(t) for t in tags]) + for tag, count in sorted([(tag, len(lst)) for tag, lst in tags.items()], key=lambda x: x[1]): + log.info(" %%%ds tag in %%d files" % maxtaglen, tag, count) + if "" in taglist: + taglist.remove("") + indexfiles = list() + depth = min(4, len(taglist)) # how many levels of indices to create at most + nindices = sum([comb(len(taglist), L) for L in range(depth - 1)]) + log.info("indices down to level %d → %d possible keyword combinations", depth, nindices) + for current_tags in tqdm( + chain(*[combinations(taglist, L) for L in range(depth - 1)]), unit="idx", desc="keyword indices", total=nindices + ): + current_tags = sorted(current_tags) + indexname = "index%s.rst" % "".join(["_" + x for x in current_tags]) + hier = make_hierarchy(tags.copy(), *current_tags) + if not any(hier.values()): + log.debug("index %s is empty!", str(current_tags)) + continue + #subtags = [set(subtag) for subtag in hier.values()] + #log.debug("subtags = %s", subtags) + #nfiles = len(set.union(*chain([set(subtag) for subtag in hier.values()]))) + #log.debug("%3d docs in index for %s...", nfiles, str(current_tags)) + log.debug("generating index for %s...", str(current_tags)) + indextext = rst_index(hier, current_tags) + with open(os.path.join(outdir, indexname), "w") as outfile: + outfile.write(indextext) + indexfiles.append(indexname) + log.info("%4d non-empty index files generated", len(indexfiles)) + return indexfiles + + def rewrite_short_description(doc, filename, short_description="Short description"): """ Modify a given text by replacing the first section named as given in @@ -488,53 +532,6 @@ def reverse_dict(tags): return revdict -def CreateTagIndices(tags, outdir="userdocs/"): - """ - This function generates all combinations of tags and creates an index page - for each combination using `rst_index`. - - Parameters - ---------- - - tags : dict - dictionary of tags - - outdir : str, path - path to the intended output directory (handed to `rst_index`. - - Returns - ------- - - list - list of names of generated files. - """ - taglist = list(tags.keys()) - maxtaglen = max([len(t) for t in tags]) - for tag, count in sorted([(tag, len(lst)) for tag, lst in tags.items()], key=lambda x: x[1]): - log.info(" %%%ds tag in %%d files" % maxtaglen, tag, count) - if "" in taglist: - taglist.remove("") - indexfiles = list() - depth = min(4, len(taglist)) # how many levels of indices to create at most - nindices = sum([comb(len(taglist), L) for L in range(depth - 1)]) - log.info("indices down to level %d → %d possible keyword combinations", depth, nindices) - for current_tags in tqdm( - chain(*[combinations(taglist, L) for L in range(depth - 1)]), unit="idx", desc="keyword indices", total=nindices - ): - current_tags = sorted(current_tags) - indexname = "index%s.rst" % "".join(["_" + x for x in current_tags]) - hier = make_hierarchy(tags.copy(), *current_tags) - if not any(hier.values()): - log.debug("index %s is empyt!", str(current_tags)) - continue - nfiles = len(set.union(*chain([set(subtag) for subtag in hier.values()]))) - log.debug("generating index for %s...", str(current_tags)) - indextext = rst_index(hier, current_tags) - with open(os.path.join(outdir, indexname), "w") as outfile: - outfile.write(indextext) - indexfiles.append(indexname) - log.info("%4d non-empty index files generated", len(indexfiles)) - return indexfiles class JsonWriter: @@ -589,42 +586,6 @@ def getTitles(text): return titles -#class ExtractUserDocs: -# def __init__(self, basedir="..", outdir="userdocs/"): -# self._basedir = basedir -# self._outdir = outdir -# -# def extract_from(self, listoffiles): -# """ -# Extract and build all user documentation and build tag indices. -# -# Writes extracted information to JSON files in outdir. In particular the -# list of seen tags mapped to files they appear in, and the indices generated -# from all combinations of tags. -# -# Parameters are the same as for `UserDocExtractor` and are handed to it -# unmodified. -# -# Returns -# ------- -# -# None -# """ -# data = JsonWriter(self._outdir) -# # Gather all information and write RSTs -# self._tags = UserDocExtractor(listoffiles, basedir=self._basedir, outdir=self._outdir) -# data.write(tags, "tags") -# -# indexfiles = CreateTagIndices(tags, outdir=self._outdir) -# data.write(indexfiles, "indexfiles") -# -# toc_list = [name[:-4] for names in tags.values() for name in names] -# idx_list = [indexfile[:-4] for indexfile in indexfiles] -# -# with open(os.path.join(self._outdir, "toc-tree.json"), "w") as tocfile: -# json.dump(list(set(toc_list)) + list(set(idx_list)), tocfile) - - def got_args(*names): "Returns True if sys.argv contains the given strings." if len(sys.argv) < len(names): @@ -637,26 +598,30 @@ def got_args(*names): def output_exit(result): json.dump(result, sys.stdout, indent=" ", check_circular=True) print() # add \n at the end. + if isinstance(result, list): + log.info("result list with %d entries", len(result)) sys.exit(0) + if __name__ == "__main__": globs = ("models/*.h", "nestkernel/*.h") basedir = ".." outdir = "userdocs/" log.debug("args: %s", repr(sys.argv)) + output = JsonWriter(outdir) files = relative_glob(*globs, basedir=basedir) if got_args("list", "files"): output_exit(files) extractor = UserDocExtractor(outdir=outdir, basedir=basedir) - tags = extractor.extract_all(files) + extractor.extract_all(files) + tags = extractor.tagdict if got_args("list", "tags"): output_exit(tags) - #data.write(tags, "tags") - # - #indexfiles = CreateTagIndices(tags, outdir=self._outdir) - #data.write(indexfiles, "indexfiles") - # - #toc_list = [name[:-4] for names in tags.values() for name in names] - #idx_list = [indexfile[:-4] for indexfile in indexfiles] + output.write(tags, "tags") + + indexfiles = extractor.CreateTagIndices() + if got_args("list", "indices"): + output_exit(indexfiles) + output.write(indexfiles, "indexfiles")