lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java (19 lines): - line 52: // TODO: - line 81: // TODO: allow passing/wrapping arbitrary postings format? - line 196: // TODO: maybe specialize into prx/no-prx/no-frq cases? - line 262: // TODO: maybe make a separate builder? These are only - line 758: // TODO: we should use the skip pointers; should be - line 781: // TODO: we should use the skip pointers; should be - line 838: // TODO: implement reuse - line 841: // TODO: the logic of which enum impl to choose should be refactored to be simpler... - line 1382: // TODO: add assert that we don't inc too many times - line 1473: // TODO: implement reuse - line 1476: // TODO: the logic of which enum impl to choose should be refactored to be simpler... - line 1546: // TODO: can do this w/o setting members? - line 1619: // TODO: can do this w/o setting members? - line 1703: // TODO: can do this w/o setting members? - line 1721: // TODO: store docID member? - line 1733: // TODO: can I do postings[upto+1]? - line 1766: // TODO: could do a better estimate - line 1899: // TODO: could do a better estimate - line 2075: // TODO: specialize offsets and not lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (14 lines): - line 578: // TODO: should we somehow make the seqNo available in the returned NRT reader? - line 597: // TODO: we could instead just clone SIS and pull/incref readers in sync'd block, and - line 1046: // TODO: maybe we could allow this? It's tricky... - line 1248: // TODO: fix tests abusing this method! - line 1758: // TODO: this is a slow linear search, but, number of - line 2889: // TODO: not great we do this heavyish op while holding IW's monitor lock, - line 2967: // TODO: addSuppressed? it could be many... - line 4739: if (merge.isExternal) { // TODO can we simplify this and just throw all the time? this would - line 4981: // TODO: in the non-pool'd case this is somewhat - line 4993: // TODO: we could fix merging to pull the merged DV iterator so we don't have to move these - line 5456: // TODO: ideally we would freeze merge.info here!! - line 5870: // TODO: should we remove this method now that it's the Directory's job to retry deletions? - line 5928: // TODO: this really should be a tragic - line 5961: if (bufferedUpdates != null && bufferedUpdates.any()) { // TODO why can this be null? lucene/core/src/java/org/apache/lucene/codecs/lucene103/blocktree/SegmentTermsEnumFrame.java (13 lines): - line 150: // TODO: Could we know the number of bytes to prefetch? - line 185: // TODO: if suffixes were stored in random-access - line 242: // TODO: we could skip this if !hasTerms; but - line 299: // TODO: skip this if !hasTerms? Then postings - line 377: // TODO: make this array'd so we can do bin search? - line 463: // TODO: better API would be "jump straight to term=N"??? - line 466: // TODO: we could make "tiers" of metadata, ie, - line 472: // TODO: if docFreq were bulk decoded we could - line 653: // TODO: not consistent that in the - line 683: // TODO early terminate when target length unequals suffix + prefix. - line 744: // TODO: not consistent that in the - line 823: // TODO this - line 867: // TODO: not consistent that in the lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/blocktree/SegmentTermsEnumFrame.java (13 lines): - line 147: // TODO: Could we know the number of bytes to prefetch? - line 182: // TODO: if suffixes were stored in random-access - line 239: // TODO: we could skip this if !hasTerms; but - line 295: // TODO: skip this if !hasTerms? Then postings - line 373: // TODO: make this array'd so we can do bin search? - line 458: // TODO: better API would be "jump straight to term=N"??? - line 461: // TODO: we could make "tiers" of metadata, ie, - line 467: // TODO: if docFreq were bulk decoded we could - line 648: // TODO: not consistent that in the - line 678: // TODO early terminate when target length unequals suffix + prefix. - line 739: // TODO: not consistent that in the - line 818: // TODO this - line 862: // TODO: not consistent that in the lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/SegmentTermsEnumFrame.java (11 lines): - line 175: // TODO: if suffixes were stored in random-access - line 243: // TODO: we could skip this if !hasTerms; but - line 299: // TODO: skip this if !hasTerms? Then postings - line 377: // TODO: make this array'd so we can do bin search? - line 462: // TODO: better API would be "jump straight to term=N"??? - line 465: // TODO: we could make "tiers" of metadata, ie, - line 471: // TODO: if docFreq were bulk decoded we could - line 592: // TODO: binary search when all terms have the same length, which is common for ID fields, - line 660: // TODO: not consistent that in the - line 739: // TODO this - line 782: // TODO: not consistent that in the lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java (10 lines): - line 149: // TODO: What's the right behavior when a collector throws CollectionTerminatedException? - line 350: // TODO: maybe a class like BS, instead of parallel arrays - line 416: // TODO: single-valued dims will always be true - line 472: // TODO: we could jump slot0 forward to the - line 488: // TODO: factor this out & share w/ union scorer, - line 501: // TODO: single-valued dims will always be true - line 517: // TODO: sometimes use advance? - line 558: // TODO: maybe a class like BS, instead of parallel arrays - line 652: // TODO: single-valued dims will always be true - line 709: // TODO: we could "fix" faceting of the sideways counts lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnumFrame.java (10 lines): - line 177: // TODO: if suffixes were stored in random-access - line 214: // TODO: we could skip this if !hasTerms; but - line 276: // TODO: skip this if !hasTerms? Then postings - line 340: // TODO: make this array'd so we can do bin search? - line 422: // TODO: make this array'd so we can do bin search? - line 501: // TODO: better API would be "jump straight to term=N"??? - line 504: // TODO: we could make "tiers" of metadata, ie, - line 510: // TODO: if docFreq were bulk decoded we could - line 705: // TODO: not consistent that in the - line 850: // TODO: not consistent that in the lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (10 lines): - line 1246: // TODO: would be better if copy() didn't cause a term lookup in TermOrdVal & co, - line 1445: // TODO: we should probably return our own stats thing...?! - line 1469: // TODO: add a separate test to check this for different reader impls - line 1480: // TODO: really the codec should not return a field - line 2597: // TODO: we should go and verify term vectors match, if the Level is high enough to - line 3754: // TODO: we could add stats to DVs, e.g. total doc count w/ a value for this field - line 3984: // TODO: we could add stats to DVs, e.g. total doc count w/ a value for this field - line 4065: // TODO: testTermsIndex - line 4088: // TODO: can we make a IS(FIR) that searches just - line 4229: // TODO: these are too anal...? lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/IDVersionSegmentTermsEnumFrame.java (9 lines): - line 157: // TODO: if suffixes were stored in random-access - line 186: // TODO: we could skip this if !hasTerms; but - line 244: // TODO: skip this if !hasTerms? Then postings - line 305: // TODO: make this array'd so we can do bin search? - line 391: // TODO: better API would be "jump straight to term=N"??? - line 394: // TODO: we could make "tiers" of metadata, ie, - line 400: // TODO: if docFreq were bulk decoded we could - line 597: // TODO: not consistent that in the - line 736: // TODO: not consistent that in the lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java (9 lines): - line 361: // TODO: we may want an alternate mode here which is - line 473: // TODO: maybe we should store common prefix - line 640: // TODO: cutover to something better for these ints! simple64? - line 720: // TODO: if ord is in same terms block and - line 766: // TODO: we still lazy-decode the byte[] for each - line 822: // TODO: cutover to random-access API - line 830: // TODO: better API would be "jump straight to term=N"??? - line 833: // TODO: we could make "tiers" of metadata, ie, - line 839: // TODO: if docFreq were bulk decoded we could lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java (8 lines): - line 63: // TODO rename to SpanHighlighting ? - line 100: // TODO Have toSpanQuery(query) Function as an extension point for those with custom Query impls - line 139: return true; // TODO set to false and provide a hook to customize certain queries. - line 169: Set fieldNameSet = new HashSet<>(); // TODO reuse. note: almost always size 1 - line 228: // TODO avoid searcher and do what it does to rewrite & get weight? - line 279: // TODO limit to a capped endOffset length somehow so we can break this loop early - line 307: throw new UnsupportedOperationException(); // TODO merge them - line 371: // TODO perhaps optionally collect (and expose) payloads? lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/VersionBlockTreeTermsWriter.java (8 lines): - line 56: TODO: - line 348: // TODO: try writing the leading vLong in MSB order - line 403: // TODO: maybe we could add bulk-add method to - line 622: // TODO: cutover to bulk int codec... simple64? - line 687: // TODO: now that terms dict "sees" these longs, - line 734: // TODO: we could block-write the term suffix pointers; - line 771: // TODO: LUCENE-5693: we don't need this check if we fix IW to not send deleted docs to us on - line 832: // TODO: if pending.size() is already 1 with a non-zero prefix length gradle/validation/ecj-lint/ecj.javadocs.prefs (7 lines): - line 14: # TODO: look into it and see if it can provide value, lots of errors - line 25: # TODO: disabled because we do have api leaks in modules - line 32: # TODO: disabled by default: seems productive but there are currently cleanups/exceptions - line 118: # TODO: resource-related warning that is normally enabled by default - line 122: # TODO: normally enabled by default: warns of unrecognized SuppressWarnings token - line 131: # TODO: ideally, we shouldn't rely on these... but we do. - line 149: # TODO: normally enabled by default: warns of unnecessary SuppressedWarnings token lucene/core/src/java/org/apache/lucene/util/fst/FST.java (7 lines): - line 41: // TODO: break this into WritableFST and ReadOnlyFST.. then - line 46: // TODO: if FST is pure prefix trie we can do a more compact - line 82: // TODO: we can free up a bit if we can nuke this: - line 903: // TODO: can't assert this because we call from readFirstArc - line 964: // TODO: would be nice to make this lazy -- maybe - line 1003: // TODO: could we somehow [partially] tableize arc lookups - line 1215: // TODO: really we should encode this as an arc, arriving lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java (7 lines): - line 77: // TODO: - line 344: // TODO: use threads? - line 410: // TODO: should use an EdgeNGramTokenFilterFactory here - line 608: // TODO: if we had a BinaryTermField we could fix - line 722: // TODO: we could allow blended sort here, combining - line 789: // TODO: maybe just stored fields? they compress... - line 922: // TODO: apps can try to invert their analysis logic lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java (6 lines): - line 42: @IgnoreRandomChains(reason = "TODO: it seems to mess up offsets!?") - line 166: // TODO: cutover to enum - line 218: // TODO: how to know how much whitespace to add - line 243: // TODO: this is inefficient - line 266: // TODO: how to know how much whitespace to add - line 286: // TODO: this is inefficient lucene/sandbox/src/java/org/apache/lucene/sandbox/search/TermAutomatonQuery.java (6 lines): - line 147: // TODO: should we add "eps back to initial node" for all states, - line 229: // TODO: what really am I supposed to do with the incoming field... - line 300: // TODO: refactor & share with Automaton.toDot! - line 343: // TODO: should we impl rewrite to return BooleanQuery of PhraseQuery, - line 504: // TODO: can PhraseQuery really handle multiple terms at the same position? If so, why do we - line 570: // TODO: we could maybe also rewrite to union of PhraseQuery (pull all finite strings) if it's lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java (6 lines): - line 53: TODO: - line 326: // TODO: try writing the leading vLong in MSB order - line 394: // TODO: maybe we could add bulk-add method to - line 707: // TODO: now that terms dict "sees" these longs, - line 757: // TODO: we could block-write the term suffix pointers; - line 869: // TODO: if pending.size() is already 1 with a non-zero prefix length dev-tools/scripts/releaseWizard.py (6 lines): - line 225: print("Loaded TODO definitions from releaseWizard.yaml") - line 265: return dumper.represent_yaml_object(cls.yaml_tag, new_data, cls, flow_style=cls.yaml_flow_style) # TODO: fix me # pyright: ignore[reportUnknownMemberType] - line 270: return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|") # TODO: fix me # pyright: ignore[reportUnknownMemberType] - line 271: return dumper.represent_scalar("tag:yaml.org,2002:str", data) # TODO: fix me # pyright: ignore[reportUnknownMemberType] - line 420: print("Cleared RC TODO state") - line 1072: yaml.Dumper.ignore_aliases = lambda self, data: True # TODO: fix me # pyright: ignore[reportUnknownLambdaType] lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/recorders/CountFacetRecorder.java (6 lines): - line 36: *

TODO: add an option to keep counts in an array, to improve performance for facets with small - line 44: *

TODO: We can also consider collecting 2 (3, 4, ..., can be parametrizes) slices to a single - line 49: *

TODO: If we come back to some for of synchronized count maps, we should be more careful what - line 73: // TODO: we are planning to do some experiments with how hash maps are assigned to leaf or slice - line 83: // TODO: even if this is called before collection started, we want it to use results from the - line 119: // TODO: do we need empty map by default? lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/LongRangeFacetCutter.java (6 lines): - line 34: *

TODO: support "total count" facet ordinal - to be able to return {@link - line 43: // TODO: refactor - weird that we have both multi and single here. - line 133: // TODO: We're going to do this again in the constructor. Can't we come up with a clever way to - line 158: // TODO: we need it only for overlapping ranges, should not handle it in advanceExact for - line 222: // TODO: does it make sense to return something else? - line 314: *

TODO: dedup lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDWriter.java (6 lines): - line 552: // TODO: there must be a simpler way? - line 595: // TODO: we could optimize/specialize, when we know it's simply fully balanced binary tree - line 631: // TODO: specialize the 1D case? it's much faster at indexing time (no partitioning on - line 839: // TODO: we could improve this, to always validate checksum as we recurse, if we shared left and - line 1186: // TODO: minor opto: we don't really have to write the actual common prefixes, because - line 1286: // TODO: we could "tail recurse" here? have our parent discard its refs as we recurse right? lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java (6 lines): - line 42: // TODO: this sentence is too long for the class summary. - line 63: // TODO: TopGroups.merge() won't work with TopGroups returned by this collector, because - line 77: // TODO: specialize into 2 classes, static "create" method: - line 249: // TODO: maybe allow no sort on retrieving groups? app - line 304: .newCollector(); // TODO: disable exact counts? - line 329: // TODO: we could aggregate scores across children lucene/core/src/java/org/apache/lucene/codecs/lucene103/blocktree/IntersectTermsEnumFrame.java (5 lines): - line 29: // TODO: can we share this with the frame in STE? - line 233: // TODO: maybe add scanToLabel; should give perf boost - line 284: // TODO: better API would be "jump straight to term=N"??? - line 287: // TODO: we could make "tiers" of metadata, ie, - line 293: // TODO: if docFreq were bulk decoded we could lucene/core/src/java/org/apache/lucene/document/LatLonDocValuesQuery.java (5 lines): - line 66: // TODO: line queries do not support within relations - line 190: return 1000f; // TODO: what should it be? - line 213: return 1000f; // TODO: what should it be? - line 236: return 1000f; // TODO: what should it be? - line 268: return 1000f; // TODO: what should it be? lucene/core/src/java/org/apache/lucene/util/PagedBytes.java (5 lines): - line 30: // TODO: refactor this, byteblockpool, fst.bytestore, and any - line 37: // TODO: these are unused? - line 121: // TODO: this really needs to be refactored into fieldcacheimpl - line 209: // TODO: we could also support variable block sizes - line 264: // TODO: this really needs to be refactored into fieldcacheimpl! lucene/spatial-extras/src/java/org/apache/lucene/spatial/composite/CompositeSpatialStrategy.java (5 lines): - line 47: // TODO support others? (BBox) - line 51: // TODO support others? - line 95: // TODO consider indexing center-point in DV? Guarantee contained by the shape, which could - line 113: // TODO resurrect Disjoint spatial query utility accepting a field name known to have - line 144: // TODO add args.clone method with new predicate? Or simply make non-final? lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java (5 lines): - line 218: // TODO: use ShingleAnalyzerWrapper? - line 265: // TODO: if only we had IndexOptions.TERMS_ONLY... - line 462: // TODO: this is somewhat iffy; today, ShingleFilter - line 555: // TODO: we could add fuzziness here - line 573: // TODO: we could do this division at build time, and lucene/core/src/java/org/apache/lucene/codecs/lucene103/blocktree/Lucene103BlockTreeTermsWriter.java (5 lines): - line 52: TODO: - line 188: * - line 757: // TODO: cutover to bulk int codec... simple64? - line 836: // TODO: now that terms dict "sees" these longs, - line 1069: // TODO: if pending.size() is already 1 with a non-zero prefix length lucene/queries/src/java/org/apache/lucene/queries/function/FunctionValues.java (5 lines): - line 64: // TODO: should we make a termVal, returns BytesRef? - line 82: * returns the bytes representation of the string val - TODO: should this return the indexed raw - line 108: * @return the sort ordinal for the specified doc TODO: Maybe we can just use intVal for this... - line 193: // TODO: should we make a termVal, fills BytesRef[]? - line 227: // TODO: change "reader" to LeafReaderContext lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/LongValueFacetCutter.java (5 lines): - line 35: *

TODO: This class is quite inefficient. Will optimise later. - line 37: *

TODO: add support for other value sources e.g: LongValues - line 43: // TODO: consider alternatives if this is a bottleneck - line 115: *

TODO: we need it to tie break sort by value. Alternatively we can sort by label (then we - line 124: // TODO: do we want to create #finish method that called by #reduce to build the map? lucene/core/src/java/org/apache/lucene/document/NearestNeighbor.java (5 lines): - line 222: // TODO: can we somehow share more with, or simply directly use, the - line 238: // TODO: if we used lucene's PQ we could just updateTop instead of poll/offer: - line 281: // TODO: if we replace approxBestDistance with actualBestDistance, we can put an opto here to - line 310: // TODO: we are assuming a binary tree - line 356: // TODO: can we make this the trueBestDistance? I.e., minimum distance between the point and lucene/core/src/java/org/apache/lucene/util/fst/FSTSuffixNodeCache.java (5 lines): - line 24: // TODO: any way to make a reverse suffix lookup (msokolov's idea) instead of more costly hash? - line 31: // TODO: couldn't we prune naturally back until we see a transition with an output? it's highly - line 195: // TODO: we could clear & reuse the previous fallbackTable, instead of allocating a new - line 221: // TODO: maybe if number of arcs is high we can safely subsample? - line 342: // TODO: https://github.com/apache/lucene/issues/12744 lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnumFrame.java (5 lines): - line 29: // TODO: can we share this with the frame in STE? - line 235: // TODO: maybe add scanToLabel; should give perf boost - line 288: // TODO: better API would be "jump straight to term=N"??? - line 291: // TODO: we could make "tiers" of metadata, ie, - line 297: // TODO: if docFreq were bulk decoded we could lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java (5 lines): - line 58: // TODO: in some cases we can filter by length? eg - line 85: // TODO: if the automaton is "smallish" we really - line 173: // TODO: we could be more efficient for the next() - line 194: // TODO: do we need ord() here? OrdsIntersectTermsEnumFrame tracks termOrd but it may be buggy! - line 459: // TODO: maybe we should do the same linear test lucene/core/src/java/org/apache/lucene/store/FSDirectory.java (5 lines): - line 272: // TODO: to improve listCommits(), IndexFileDeleter could call this after deleting segments_Ns - line 317: // TODO: we could fix IndexInputs from FSDirectory subclasses to call this when they are - line 347: // TODO: can we remove this OS-specific hacky logic? If windows deleteFile is buggy, we - line 364: // TODO: this is hacky/lenient (we don't know which IOException this is), and - line 368: // TODO: can/should we do if (Constants.WINDOWS) here, else throw the exc? lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/blocktree/IntersectTermsEnumFrame.java (5 lines): - line 30: // TODO: can we share this with the frame in STE? - line 244: // TODO: maybe add scanToLabel; should give perf boost - line 295: // TODO: better API would be "jump straight to term=N"??? - line 298: // TODO: we could make "tiers" of metadata, ie, - line 304: // TODO: if docFreq were bulk decoded we could lucene/core/src/java/org/apache/lucene/util/fst/Util.java (5 lines): - line 49: // TODO: would be nice not to alloc this on every lookup - line 70: // TODO: maybe a CharsRef version for BYTE2 - line 78: // TODO: would be nice not to alloc this on every lookup - line 301: // TODO: we could enable FST to sorting arcs by weight - line 305: // TODO: maybe we should make an FST.INPUT_TYPE.BYTE0.5!? lucene/core/src/java/org/apache/lucene/util/fst/FSTEnum.java (5 lines): - line 119: // TODO: should we return a status here (SEEK_FOUND / SEEK_NOT_FOUND / - line 127: // TODO: possibly caller could/should provide common - line 320: // TODO: possibly caller could/should provide common - line 601: // TODO: if each arc could somehow read the arc just - line 644: // TODO: possibly caller could/should provide common lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/IntersectTermsEnumFrame.java (5 lines): - line 30: // TODO: can we share this with the frame in STE? - line 254: // TODO: maybe add scanToLabel; should give perf boost - line 305: // TODO: better API would be "jump straight to term=N"??? - line 308: // TODO: we could make "tiers" of metadata, ie, - line 314: // TODO: if docFreq were bulk decoded we could lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java (5 lines): - line 40: *

  • TODO - report into Excel and/or graphed view. - line 41: *
  • TODO - perf comparison between Lucene releases over the years. - line 42: *
  • TODO - perf report adequate to include in Lucene nightly build site? (so we can easily - line 44: *
  • TODO - add overall time control for repeated execution (vs. current by-count only). - line 45: *
  • TODO - query maker that is based on index statistics. lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TermVectorFilteredLeafReader.java (4 lines): - line 75: // TODO delegate size() ? - line 77: // TODO delegate getMin, getMax to filterTerms - line 95: // TODO: track the last term state from the term state method and do some potential - line 104: // TODO delegate docFreq & ttf (moveToCurrentTerm() then call on full? lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java (4 lines): - line 74: // TODO: maybe we should resolve token -> wordID then run - line 77: // TODO: a more efficient approach would be Aho/Corasick's - line 109: // TODO: we should set PositionLengthAttr too... - line 539: // TODO: maybe just a PendingState class, holding lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java (4 lines): - line 96: // TODO once strategies have factories, we could use them here. - line 103: // TODO add more as-needed - line 158: // TODO remove previous round config? - line 239: // TODO consider abusing the 'size' notion to number of shapes per document lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java (4 lines): - line 100: // TODO if a Term affects multiple fields, we could keep the updates key'd by Term - line 230: // TODO: we can process the updates per DV field, from last to first so that - line 255: // TODO: we traverse the terms in update order (not term order) so that we - line 264: // TODO: we could at least *collate* by field? lucene/suggest/src/java/org/apache/lucene/search/suggest/document/FuzzyCompletionQuery.java (4 lines): - line 161: // TODO Accumulating all refs is bad, because the resulting set may be very big. - line 162: // TODO Better iterate over automaton again inside FuzzyCompletionWeight? - line 177: // TODO: maybe add alphaMin to LevenshteinAutomata, - line 201: // TODO: we could call toLevenshteinAutomata() before det? lucene/core/src/java/org/apache/lucene/util/UnicodeUtil.java (4 lines): - line 106: }); // TODO this is unrelated here find a better place for it - line 475: // TODO: ints must not be null, should be an assert - line 519: // TODO: this may read past utf8's limit. - line 637: // TODO: broken if chars.offset != 0 lucene/core/src/java/org/apache/lucene/codecs/lucene103/blocktree/TrieBuilder.java (4 lines): - line 33: *

    TODO make this trie builder a more memory efficient structure. - line 284: // TODO if we have only one child and no output, we can store child labels in this node. - line 503: *

    TODO: Can we use VectorAPI to speed up the lookup? we can check 64 labels once on AVX512! - line 544: *

    TODO: Can we use VectorAPI to speed up the lookup? we can check 64 labels once on AVX512! lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java (4 lines): - line 112: // TODO: yet another option is to count all segs - line 203: // TODO: yet another option is to count all segs - line 279: // TODO: is this right? really, we need a way to - line 310: // TODO: is this right? really, we need a way to lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java (4 lines): - line 86: *

    TODO: merge this with runAutomaton - line 265: // TODO: this is a bit fragile because if the automaton is not minimized there could be more - line 282: // TODO: use binary search here - line 341: // TODO: should this take startTerm too? This way lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/AbstractVisitingPrefixTreeQuery.java (4 lines): - line 101: // TODO MAJOR REFACTOR SIMPLIFICATION BASED ON TreeCellIterator TODO - line 186: visitLeaf(indexedCell); // TODO or query cell? Though shouldn't matter. - line 196: visitLeaf(indexedCell); // TODO or query cell? Though shouldn't matter. - line 221: // TODO use termsEnum.docFreq() as heuristic lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java (4 lines): - line 254: // TODO: We don't need this if we're okay with having an integer -1 in the results even for float - line 404: // TODO: It would be nice if TaxonomyReader let us pass in a buffer + size so we didn't have to - line 476: // TODO: would be faster if we had a "get the following children" API? then we - line 651: // TODO: We could consider indexing dim counts directly if getTopDims is a common lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90PostingsWriter.java (4 lines): - line 168: // TODO: should we try skipping every 2/4 blocks...? - line 358: // TODO: wasteful we are counting this (counting # docs - line 398: // TODO: should we send offsets/payloads to - line 514: // TODO: add a finish() at least to PushBase? DV too...? lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (4 lines): - line 609: // TODO: like addIndexes, we are relying on createCompoundFile to successfully cleanup... - line 626: // TODO: ideally we would freeze newSegment here!! - line 644: // TODO: we should prune the segment if it's 100% - line 647: // TODO: in the NRT case it'd be better to hand lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java (4 lines): - line 61: private final SortedMap fieldToReader = new TreeMap<>(); // TODO needn't sort? - line 121: // TODO: make this read-only in a cleaner way? - line 160: // TODO consider populating 1st leaf with vectors even if the field name has been seen on - line 165: // TODO consider populating 1st leaf with terms even if the field name has been seen on a lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/IntersectTermsEnum.java (4 lines): - line 71: // TODO: in some cases we can filter by length? eg - line 100: // TODO: if the automaton is "smallish" we really - line 190: // TODO: we could be more efficient for the next() - line 498: // TODO: maybe we should do the same linear test lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java (4 lines): - line 235: // TODO: should we use a more optimized Codec? - line 361: // TODO: share per-segment TermsEnum here! - line 623: // TODO: share per-segment TermsEnum here! - line 715: // TODO: share per-segment TermsEnum here! lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/IDVersionSegmentTermsEnum.java (4 lines): - line 265: // TODO: we could stop earlier w/ the version check, every time we traverse an index arc we can - line 299: // TODO: reverse vLong byte order for better FST - line 685: // TODO: we should write our vLong backwards (MSB - line 706: // TODO: we could save the outputs in local lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/blocktree/IntersectTermsEnum.java (4 lines): - line 71: // TODO: in some cases we can filter by length? eg - line 100: // TODO: if the automaton is "smallish" we really - line 192: // TODO: we could be more efficient for the next() - line 504: // TODO: maybe we should do the same linear test lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java (4 lines): - line 282: // TODO: reuse - line 293: // TODO: make it constant-time - line 402: // TODO: reuse - line 410: // TODO: reuse lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java (4 lines): - line 562: // TODO: I think we can avoid the extra 2 bytes when - line 831: // TODO: for fuzzy case would be nice to return - line 886: // TODO: is there a Reader from a CharSequence? - line 895: // TODO: we can optimize this somewhat by determinizing lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java (4 lines): - line 445: // TODO: move to test-framework? - line 459: // TODO: move to test-framework? - line 468: // TODO: move to test-framework? - line 716: // TODO (LUCENE-9983): these int sets really do not need to be sorted, and we are paying lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50PostingsReader.java (4 lines): - line 835: // TODO: make frq block load lazy/skippable - line 901: // TODO: in theory we could avoid loading frq block - line 1255: // TODO: in theory we could avoid loading frq block - line 1703: // TODO: in theory we could avoid loading frq block lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java (4 lines): - line 280: // TODO: reverse vLong byte order for better FST - line 567: // TODO: we should write our vLong backwards (MSB - line 588: // TODO: we could save the outputs in local - line 1181: // TODO: this is similar to Util.getByOutput ... can we refactor/share? lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java (4 lines): - line 169: this.searcher = indexSearcher; // TODO: make non nullable - line 999: // TODO reconsider the return type; since this is an "advanced" method, lets not return a Map? - line 1095: // TODO make content a List? and return a List? and ensure getEmptyHighlight is never invoked - line 1567: // TODO: useQueryBoosts lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserMorphData.java (4 lines): - line 64: return null; // TODO: add support? - line 69: return null; // TODO: add support? - line 74: return null; // TODO: add support? - line 79: return null; // TODO: add support? lucene/core/src/java/org/apache/lucene/index/OrdinalMap.java (4 lines): - line 45: // TODO: we could also have a utility method to merge Terms[] and use size() as a weight when we - line 47: // TODO: use more efficient packed ints structures? - line 306: // TODO: we could specialize this case (the while loop is not needed when the ords - line 435: // TODO: would be nice to return the ordinal and segment maps too, but it's not straightforward lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java (4 lines): - line 895: // TODO: if we fixed each partition step to just record the file offset at the "split point", we - line 935: // TODO: specialize the 1D case? it's much faster at indexing time (no partitioning on - line 1566: // TODO: we could improve this, to always validate checksum as we recurse, if we shared left and - line 2007: // TODO: minor opto: we don't really have to write the actual common prefixes, because lucene/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java (4 lines): - line 75: // TODO: why is this package private? - line 154: // TODO: we should make this final as it is called in the constructor - line 474: // TODO: we should use ReaderUtil+seekExact, we dont care about the docFreq - line 545: // TODO: this isn't that great, maybe in the future SpellChecker should take lucene/facet/src/java/org/apache/lucene/facet/sortedset/ConcurrentSortedSetDocValuesFacetCounts.java (4 lines): - line 82: // TODO: safe to always assume there are counts, but maybe it would be more optimal to - line 125: // TODO: yet another option is to count all segs - line 281: // TODO: is this right? really, we need a way to - line 324: // TODO: is this right? really, we need a way to lucene/core/src/java/org/apache/lucene/index/IndexingChain.java (4 lines): - line 449: // TODO: catch missing DV fields here? else we have - line 631: // TODO: for broken docs, optimize termsHash.finishDocument - line 831: // TODO: should this be checked when a fieldType is created? - line 1328: // TODO: maybe add some safety? then again, it's already checked lucene/core/src/java/org/apache/lucene/codecs/lucene103/blocktree/IntersectTermsEnum.java (4 lines): - line 66: // TODO: in some cases we can filter by length? eg - line 96: // TODO: if the automaton is "smallish" we really - line 185: // TODO: we could be more efficient for the next() - line 492: // TODO: maybe we should do the same linear test lucene/core/src/java/org/apache/lucene/search/BooleanScorerSupplier.java (3 lines): - line 194: // TODO: what is the right heuristic here? - line 198: // TODO: is there actually a threshold under which we should rather - line 221: // TODO: there are some cases where BooleanScorer lucene/core/src/java/org/apache/lucene/util/automaton/NFARunAutomaton.java (3 lines): - line 313: statesSet.reset(); // TODO: fork IntHashSet from hppc instead? - line 318: // TODO: binary search should be faster, since transitions are sorted - line 369: .sort(); // TODO: could use a PQ (heap) instead, since transitions for each state are dev-tools/scripts/releaseWizard.yaml (3 lines): - line 50: In the first TODO step in the checklist you will be asked to read up on the - line 154: # You can reference state saved from earlier TODO items using syntax - line 163: # persist_vars: ['var_name', 'var_name'] # List of variables to persist in TODO state lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84PostingsReader.java (3 lines): - line 938: // TODO: in theory we could avoid loading frq block - line 1482: // TODO: in theory we could avoid loading frq block - line 1929: // TODO: in theory we could avoid loading frq block lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java (3 lines): - line 238: // TODO: maybe we should consume and close it? Why does it need to stay open? - line 359: // TODO: convert to a switch? - line 1118: // TODO: the flags themselves can be double-chars (long) or also numeric lucene/core/src/java/org/apache/lucene/index/DocValuesFieldUpdates.java (3 lines): - line 280: // TODO: also use this for merging, instead of having to write through to disk first - line 376: // TODO: if the Sorter interface changes to take long indexes, we can remove that limitation - line 420: // TODO: can't this just be NumericDocValues now? avoid boxing the long value... lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTree.java (3 lines): - line 52: TODO Improvements: - line 183: // TODO consider also doing fast-path if field is <= hours even if before greg change date - line 226: || field >= Calendar.HOUR_OF_DAY) // TODO make configurable lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/NumberRangePrefixTree.java (3 lines): - line 561: // Shared: (TODO put this in a new class) - line 783: // TODO benchmark if this optimization pays off. We avoid two comparePrefixLV calls. - line 876: // TODO override nextFrom to be more efficient lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/SegmentTermsEnum.java (3 lines): - line 356: // TODO: reverse vLong byte order for better FST - line 643: // TODO: we should write our vLong backwards (MSB - line 664: // TODO: we could save the outputs in local lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java (3 lines): - line 31: * TODO: Consider implementing https://issues.apache.org/jira/browse/LUCENE-1688 changes to stop list and associated constructors - line 87: *

    TODO:Consider adding an option to not emit unigram stopwords as in CDL XTF BigramStopFilter, - line 90: *

    TODO: Consider optimizing for the case of three commongrams i.e "man of the year" normally lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/OverlappingLongRangeFacetCutter.java (3 lines): - line 59: * TODO: it's identical to private OverlappingLongRangeCounter#buildElementaryIntervals, let's - line 162: * TODO: dedup OverlappingMultivaluedRangeLeafFacetCutter and - line 238: // TODO: for single valued we can rollup after collecting all documents, e.g. in reduce lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java (3 lines): - line 39: // TODO: maybe we should resolve token -> wordID then run - line 42: // TODO: a more efficient approach would be Aho/Corasick's - line 451: // TODO: we could encode this instead into the FST: lucene/core/src/java/org/apache/lucene/document/LatLonPoint.java (3 lines): - line 86: // TODO ^^^ that is very sandy and hurts the API, usage, and tests tremendously, because what the - line 228: // and should not drag in extra bogus junk! TODO: should encodeCeil just throw - line 405: // TODO: what about multi-valued documents? what happens? lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java (3 lines): - line 62: grid.getMaxLevels() - 4; // TODO this default constant is dependent on the prefix grid size - line 77: // TODO if negative then subtract from maxlevels - line 232: // TODO: AVPTQ will still scan the bottom nonetheless; file an issue to eliminate that lucene/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java (3 lines): - line 82: // TODO: allow null groupSort to mean "by relevance", - line 201: // TODO: should we add option to mean "ignore docs that - line 292: // TODO: optimize this lucene/spatial-extras/src/java/org/apache/lucene/spatial/composite/IntersectsRPTVerifyQuery.java (3 lines): - line 146: return 100; // TODO: use cost of exactIterator.advance() and - line 168: // TODO refactor AVPTQ to not be a Query? - line 188: // TODO consider if IntersectsPrefixTreeQuery should simply do this and provide both sets lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/PrefixTreeFacetCounter.java (3 lines): - line 130: // TODO use RPT's configured scan level? Do we know better here? Hard to say. - line 134: // traversal code. TODO consider refactoring if/when it makes sense (more use cases than this) - line 175: // TODO this opt should move to VisitorTemplate (which contains an optimization TODO to lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java (3 lines): - line 203: // TODO: right now there's no penalty for fuzzy/edits, - line 250: // TODO: maybe add alphaMin to LevenshteinAutomata, - line 274: // TODO: we could call toLevenshteinAutomata() before det? lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene99/Lucene99PostingsReader.java (3 lines): - line 928: // TODO: in theory we could avoid loading frq block - line 1467: // TODO: in theory we could avoid loading frq block - line 1915: // TODO: in theory we could avoid loading frq block lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Viterbi.java (3 lines): - line 146: // TODO: should return meaningful value? - line 186: // TODO: sort of silly to make Token instances here; the - line 349: // TODO we should extract the penalty (left-space-penalty-factor) from the dicrc file. lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java (3 lines): - line 44: // TODO: currently we encode all terms between two indexed - line 324: // TODO: cutover to better intblock codec, instead - line 334: // TODO: cutover to better intblock codec. simple64? lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90PostingsReader.java (3 lines): - line 920: // TODO: in theory we could avoid loading frq block - line 1470: // TODO: in theory we could avoid loading frq block - line 1915: // TODO: in theory we could avoid loading frq block lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java (3 lines): - line 56: // TODO: if DoubleDocValuesField used - line 202: return 100; // TODO: use cost of range.accept() - line 309: return 100; // TODO: use cost of range.accept() lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/recorders/LongAggregationsFacetRecorder.java (3 lines): - line 37: *

    TODO: [premature optimization idea] if instead of one array we keep aggregations in two - line 110: // TODO: do we need empty map by default? - line 198: // TODO: cache advance/longValue results for current doc? Skipped for now as LongValues lucene/spatial-extras/src/java/org/apache/lucene/spatial/serialized/SerializedDVStrategy.java (3 lines): - line 65: // TODO do we make this non-volatile since it's merely a heuristic? - line 99: // TODO if makeShapeValueSource gets lifted to the top; this could become a generic impl. - line 116: // TODO raise to SpatialStrategy lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/WithinPrefixTreeQuery.java (3 lines): - line 49: // TODO LUCENE-4869: implement faster algorithm based on filtering out false-positives of a - line 53: // TODO Could the recursion in allCellsIntersectQuery() be eliminated when non-fuzzy or other - line 113: // TODO move this generic code elsewhere? Spatial4j? lucene/core/src/java/org/apache/lucene/codecs/lucene103/Lucene103PostingsWriter.java (3 lines): - line 561: // TODO: wasteful we are counting this (counting # docs - line 590: // TODO: should we send offsets/payloads to - line 696: // TODO: add a finish() at least to PushBase? DV too...? lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java (3 lines): - line 50: // TODO: can we require fewer args? (same for - line 189: return 100; // TODO: use cost of range.accept() - line 296: return 100; // TODO: use cost of range.accept() lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java (3 lines): - line 452: // TODO: for better perf (but more RAM used) we - line 516: // TODO: try to avoid wasteful cases: disable doFixedLengthArcs in that case - line 1062: // TODO: instead of recording isFinal/output on the lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java (3 lines): - line 45: // TODO: - line 82: // TODO: - line 243: // TODO: maybe just stored fields? they compress... lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java (2 lines): - line 93: // TODO: cut over to BytesRefHash in BufferedDeletes - line 561: interface FlushNotifications { // TODO maybe we find a better name for this? lucene/replicator/src/java/org/apache/lucene/replicator/nrt/CopyOneFile.java (2 lines): - line 50: // TODO: pass correct IOCtx, e.g. seg total size - line 148: // TODO: rsync will fsync a range of the file; maybe we should do that here for large files in lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/PackedQuadPrefixTree.java (2 lines): - line 307: // TODO clear last bit without the condition - line 319: 8)); // TODO remove lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java (2 lines): - line 63: // TODO: Add a concurrent version much like ConcurrentSortedSetDocValuesFacetCounts? - line 404: // TODO: yet another option is to count all segs lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestDocsCollector.java (2 lines): - line 130: // TODO: reuse the overflow instance? - line 145: // TODO: we could use a priority queue here to make cost O(N * log(num)) instead of O(N * lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java (2 lines): - line 243: return 100; // TODO: use cost of values.getOrd() and foundOrds.get() - line 277: return 100; // TODO: use cost of values.getOrd() and foundOrds.get() lucene/sandbox/src/java/org/apache/lucene/sandbox/search/TermAutomatonScorer.java (2 lines): - line 32: // TODO: add two-phase and needsScores support. maybe use conjunctionDISI internally? - line 373: // TODO: we could probably do better here, e.g. look @ freqs of actual terms involved in this lucene/core/src/java/org/apache/lucene/search/ControlledRealTimeReopenThread.java (2 lines): - line 187: // TODO: maybe use private thread ticktock timer, in - line 194: // TODO: try to guestimate how long reopen might lucene/core/src/java/org/apache/lucene/util/OfflineSorter.java (2 lines): - line 329: // TODO: we shouldn't have to do this? Can't we return a merged reader to - line 515: // TODO: this should optimize the fixed width case as well lucene/core/src/java/org/apache/lucene/search/ConstantScoreScorer.java (2 lines): - line 74: // TODO: Only wrap when it is the top-level scoring clause? See - line 94: // TODO: Only wrap when it is the top-level scoring clause? See lucene/sandbox/src/java/org/apache/lucene/sandbox/search/SortedSetDocValuesMultiRangeQuery.java (2 lines): - line 176: // TODO perhaps count() specification? - line 220: // TODO unwrap singleton? lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java (2 lines): - line 74: // TODO should we add .files() to FieldInfosFormat, like we have on - line 243: // TODO we could rely on TrackingDir.getCreatedFiles() (like we do for lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java (2 lines): - line 412: // TODO: maybe use long? But our keys are typically short... - line 504: // TODO: can't we just merge this w/ lucene/core/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java (2 lines): - line 206: // TODO: optimize for single-valued case? - line 207: // TODO: do all kinds of other optimizations! lucene/spatial-extras/src/java/org/apache/lucene/spatial/bbox/BBoxStrategy.java (2 lines): - line 214: // TODO raise to SpatialStrategy - line 221: // TODO if makeShapeValueSource gets lifted to the top; this could become a generic impl. lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/iterators/TaxonomyChildrenOrdinalIterator.java (2 lines): - line 30: // TODO: do we want to have something like ChainOrdinalIterators to chain multiple iterators? - line 47: // TODO: in some cases it might be faster to traverse children of selected parent lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/TaxonomyFacetsCutter.java (2 lines): - line 100: // TODO: if multiValued is emptySortedNumeric we can throw CollectionTerminatedException - line 106: // TODO: does unwrapping Single valued make things any faster? We still need to wrap it into lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BigramDictionary.java (2 lines): - line 90: reason = "TODO: fix code to serialize its own dictionary vs. a binary blob in the codebase") - line 101: reason = "TODO: fix code to serialize its own dictionary vs. a binary blob in the codebase") lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java (2 lines): - line 38: // TODO: maybe move under blocktree? but it's used by other terms dicts (e.g. Block) - line 40: // TODO: find a better name; this defines the API that the lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java (2 lines): - line 597: // TODO: binary search - line 614: // TODO: binary search lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/quantization/KMeans.java (2 lines): - line 238: // TODO: replace with RandomVectorScorer::score possible on quantized vectors - line 299: // TODO: replace with RandomVectorScorer::score possible on quantized vectors lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NearRealtimeReaderTask.java (2 lines): - line 67: // TODO: gather basic metrics for reporting -- eg mean, - line 87: // TODO: somehow we need to enable warming, here lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (2 lines): - line 464: // TODO refactor BytesRefArray to allow us to apply maxReusedBytes option - line 554: // TODO: deprecate & move this method into AnalyzerUtil? lucene/core/src/java/org/apache/lucene/util/fst/Outputs.java (2 lines): - line 35: // TODO: maybe change this API to allow for re-use of the - line 92: // TODO: maybe make valid(T output) public...? for asserts lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/DefaultICUTokenizerConfig.java (2 lines): - line 75: // TODO: if the wrong version of the ICU jar is used, loading these data files may give a strange - line 86: // TODO: deprecate this boolean? you only care if you are doing super-expert stuff... lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java (2 lines): - line 38: // TODO: we could assert that this is set-once, ie, - line 64: // TODO: we could use -1 as default here? Then we can lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java (2 lines): - line 235: // TODO ensure that field-sensitivity is preserved ie the query - line 405: // TODO alternatively could call extract terms here? lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenateGraphFilter.java (2 lines): - line 227: // TODO refactor this - line 246: // TODO: we can optimize this somewhat by determinizing lucene/core/src/java/org/apache/lucene/codecs/FieldsConsumer.java (2 lines): - line 41: // TODO: can we somehow compute stats for you...? - line 43: // TODO: maybe we should factor out "limited" (only lucene/core/src/java/org/apache/lucene/index/FieldInfos.java (2 lines): - line 321: // TODO: what happens if in fact a different order is used? - line 377: // TODO: we should similarly catch an attempt to turn lucene/core/src/java/org/apache/lucene/store/NativeFSLockFactory.java (2 lines): - line 121: IOUtils.closeWhileHandlingException(channel); // TODO: addSuppressed - line 137: // TODO: kind of bogus we even pass channel: lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/NonOverlappingLongRangeFacetCutter.java (2 lines): - line 40: * TODO: it's identical to private ExclusiveLongRangeCounter#buildElementaryIntervals, let's - line 81: * TODO: dedup NonOverlappingLongRangeMultiValueLeafFacetCutter and lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java (2 lines): - line 291: // TODO: how could we compute a match cost? - line 434: // TODO: When score mode is None, this check is broken because the child approximation is not lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java (2 lines): - line 42: // TODO: we could store exact weights as outputs from the FST (int4 encoded - line 46: // TODO: support for Analyzers (infix suggestions, synonyms?) lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java (2 lines): - line 66: // TODO: generalize this query (at least it should not reuse this static sim! - line 330: // TODO possible alternative step 3 - organize above booleans into a new layer of field-based lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/SimplePatternTokenizer.java (2 lines): - line 41: // TODO: the matcher here is naive and does have N^2 adversarial cases that are unlikely to arise in - line 62: // TODO: we could likely use a single rolling buffer instead of two separate char buffers here. lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java (2 lines): - line 280: return 100; // TODO: use cost of values.getOrd() and collector.score() - line 317: return 100; // TODO: use cost.getOrd() of values and collector.score() lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DocTermsIndexDocValues.java (2 lines): - line 125: // TODO: are lowerVal and upperVal in indexed form or not? - line 189: // TODO: why? lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/ViterbiNBest.java (2 lines): - line 115: // TODO: maybe we do something else here, instead of just - line 222: // TODO: sort of silly to make Token instances here; the lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java (2 lines): - line 143: // TODO re-use TokenStream LUCENE-5776: Subclass Field, put cell iterator there, override - line 182: return grid.getTreeCellIterator(shape, detailLevel); // TODO should take a re-use iterator dev-tools/scripts/pyproject.toml (2 lines): - line 14: # This is a big TODO list of current typing problems - line 38: # This is a big TODO list of current linter problems. lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFR94Parser.java (2 lines): - line 31: "DATE:", "date:", // TODO improve date extraction for this format - line 36: // TODO can we also extract title for this format? lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java (2 lines): - line 333: // TODO: add finish() to shrink wrap the arrays? - line 936: // TODO: BitSet RAM usage (isAccept.size()/8) isn't fully accurate... lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java (2 lines): - line 84: // TODO it'd be nice if we could get at the underlying Automaton in CharacterRunAutomaton so - line 125: // it'd be nice to use KeepWordFilter but it demands a CharArraySet. TODO File JIRA? Need a new lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FSTUtil.java (2 lines): - line 28: // TODO: move to core? nobody else uses it yet though... - line 98: // TODO: if this transition's TO state is accepting, and lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java (2 lines): - line 81: // TODO: validate this for real, somewhere else - line 85: // TODO: add sumDocFreq for field (numberOfFieldPostings) lucene/core/src/java/org/apache/lucene/search/package-info.java (2 lines): - line 60: * - line 509: * lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java (2 lines): - line 651: // TODO: this is very conservative, could we reuse information for even int4 quantization? - line 930: // TODO: this is very conservative, could we reuse information for even int4 lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java (2 lines): - line 154: // TODO: use a more efficient datastructure: automaton? - line 248: // TODO: this parser is ugly, but works. use a jflex grammar instead. lucene/core/src/java/org/apache/lucene/search/SearcherLifetimeManager.java (2 lines): - line 120: // TODO: we could get by w/ just a "set"; need to have - line 143: // TODO: we don't have to use IR.getVersion to track; lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java (2 lines): - line 121: // TODO: we should probably nuke this and make a more efficient 4.x format - line 243: // count manually! TODO: Maybe enforce that Fields.size() returns something valid? lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/SimplePatternSplitTokenizer.java (2 lines): - line 46: // TODO: this is copied from SimplePatternTokenizer, but there are subtle differences e.g. we - line 50: // TODO: we could likely use a single rolling buffer instead of two separate char buffers here. lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/iterators/TopnOrdinalIterator.java (2 lines): - line 49: // TODO: current taxonomy implementations limit queue size by taxo reader size too, but it - line 52: // TODO: create queue lazily - skip if first nextOrd is NO_MORE_ORDS ? lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/labels/OrdToLabel.java (2 lines): - line 25: *

    TODO: move FacetLabel out of taxonomy folder to use it for any facets, not just taxonomy? - line 30: /** get label of one ord TODO: what do we return when ordinal is not valid? null? */ lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/ToStringUtil.java (2 lines): - line 252: // TODO: now that this is used by readingsfilter and not just for - line 914: // TODO: investigate all this lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java (2 lines): - line 123: // TODO: support embedding; I think it should work but - line 216: // TODO: This should check current format from the field attribute? lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/labels/LabelToOrd.java (2 lines): - line 25: *

    TODO: move FacetLabel out of taxonomy folder to use it for any facets, not just taxonomy? - line 27: *

    TODO: there is some overlap with {@link lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/FacetFieldLeafCollector.java (2 lines): - line 50: // TODO: see comment in FacetFieldCollector#scoreMode - line 71: // TODO: any ideas? lucene/core/src/java/org/apache/lucene/index/SegmentDocValuesProducer.java (2 lines): - line 29: // TODO: try to clean up close? no-op? - line 30: // TODO: add shared base class (also used by per-field-pf?) to allow "punching thru" to low level lucene/core/src/java/org/apache/lucene/index/PointValues.java (2 lines): - line 368: // TODO: we can assert that the first value here in fact matches what the pointTree - line 412: *

    TODO: will broad-first help estimation terminate earlier? lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/core/nodes/QueryNodeImpl.java (2 lines): - line 34: // TODO remove PLAINTEXT_FIELD_NAME replacing it with configuration APIs - line 201: // TODO: remove this method, it's commonly used by {@link lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/DoubleRangeFacetCutter.java (2 lines): - line 61: // TODO: instead of relying on either single value source or multi value source to be null, we - line 74: // TODO: it is exactly the same as DoubleRangeFacetCounts#getLongRanges (protected), we should lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/CharacterDefinitionWriter.java (2 lines): - line 40: /** Constructor for building. TODO: remove write access */ - line 76: // TODO: length def ignored lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/WordDictionary.java (2 lines): - line 150: reason = "TODO: fix code to serialize its own dictionary vs. a binary blob in the codebase") - line 163: reason = "TODO: fix code to serialize its own dictionary vs. a binary blob in the codebase") lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/utils/BaseFacetBuilder.java (2 lines): - line 90: // TODO: support other aggregations - line 100: // TODO: add support for other aggregation types, e.g. float/int associations lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/blocktree/FieldReader.java (2 lines): - line 104: // TODO: this branch is never taken - line 217: // TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum? lucene/core/src/java/org/apache/lucene/index/Terms.java (2 lines): - line 67: // TODO: could we factor out a common interface b/w - line 73: // TODO: eventually we could support seekCeil/Exact on lucene/sandbox/src/java/org/apache/lucene/sandbox/search/TokenStreamToTermAutomatonQuery.java (2 lines): - line 106: // TODO: look at endOffset? ts2a did... - line 108: // TODO: this (setting "last" state as the only accept state) may be too simplistic? lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/LruTaxonomyWriterCache.java (2 lines): - line 49: // TODO (Facet): choose between NameHashIntCacheLRU and NameIntCacheLRU. - line 60: // TODO (Facet): choose between NameHashIntCacheLRU and NameIntCacheLRU. lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java (2 lines): - line 86: // TODO: test DoubleBarrelLRUCache and consider using it instead - line 474: // TODO LUCENE-10068: can we use an int-based hash impl, such as IntToObjectMap, lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysQuery.java (2 lines): - line 43: // TODO change the way DrillSidewaysScorer is used, this query does not work - line 218: // TODO: these should do "deeper" equals/hash on the 2-D drillDownTerms array lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/IndexedDISI.java (2 lines): - line 545: // TODO: binary search - line 561: // TODO: binary search lucene/replicator/src/java/org/apache/lucene/replicator/nrt/PrimaryNode.java (2 lines): - line 144: // TODO: in the future, we should separate "flush" (returns an incRef'd SegmentInfos) from - line 222: // TODO (opto): it's a bit wasteful that we put "last refresh" version here, not the actual lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java (1 line): - line 183: // TODO: refactor to a shared readFully somewhere lucene/core/src/java/org/apache/lucene/util/hnsw/MergingHnswGraphBuilder.java (1 line): - line 129: // TODO: optimize to iterate only over unset bits in initializedNodes lucene/core/src/java/org/apache/lucene/util/compress/LowercaseAsciiCompression.java (1 line): - line 115: // TODO: shouldn't this really be an assert instead? but then this real "if" triggered lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java (1 line): - line 234: // TODO: we should only provide the "slice" of FIS lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java (1 line): - line 163: // TODO: move this check somewhere else, and impl the other missing ones lucene/core/src/java/org/apache/lucene/util/BytesRefArray.java (1 line): - line 52: // TODO: it's trappy that this does not return storage held by int[] offsets array! lucene/core/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java (1 line): - line 168: tryApplyGlobalSlice(); // TODO doing this each time is not necessary maybe lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java (1 line): - line 202: // TODO: can we somehow use IOUtils here...? problem is lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java (1 line): - line 44: // TODO: Is this the right behavior or should we return false? Currently, " ", returns true, lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java (1 line): - line 39: // TODO: update BTR to nuke this lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/query/SpanNearClauseFactory.java (1 line): - line 67: public class SpanNearClauseFactory { // FIXME: rename to SpanClauseFactory lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java (1 line): - line 117: // TODO: we could even remove some scorers from the priority queue? lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/DefFunction.java (1 line): - line 123: // TODO: need ValueSource.type() to determine correct type lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java (1 line): - line 224: // TODO: are we using the best sharing options? lucene/classification/src/java/org/apache/lucene/classification/utils/NearestFuzzyQuery.java (1 line): - line 258: // TODO possible alternative step 3 - organize above booleans into a new layer of field-based lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java (1 line): - line 69: // TODO: if we want to stream, maybe we should use jdk stream class? lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java (1 line): - line 255: // TODO: move this check to createWeight to happen earlier to the user? lucene/core/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java (1 line): - line 500: // TODO: improve this to return more detailed info? lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/FacetCutter.java (1 line): - line 26: *

    TODO: do we need FacetCutterManager similar to CollectorManager, e.g. is createLeafCutter lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionFieldsProducer.java (1 line): - line 143: // TODO: checkIntegrity should checksum the dictionary and index lucene/core/src/java/org/apache/lucene/document/Field.java (1 line): - line 228: // TODO: allow direct construction of int, long, float, double value too..? lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java (1 line): - line 36: // TODO: find a better name; this defines the API that the lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java (1 line): - line 480: // TODO: this is sneaky, should we do this, or change TestIWExceptions? rollback lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java (1 line): - line 81: // TODO: should we allow multiple segmentations per input 'phrase'? lucene/core/src/java/org/apache/lucene/search/AbstractMultiTermQueryConstantScoreWrapper.java (1 line): - line 238: // TODO: Instead of replicating the cost logic of a BooleanQuery we could consider rewriting lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/JaMorphData.java (1 line): - line 70: // TODO: maybe we should have a optimal method, a non-typesafe lucene/core/src/java/org/apache/lucene/index/DocValuesSkipIndexType.java (1 line): - line 43: // TODO: add support for pre-aggregated integer/float/double dev-tools/scripts/buildAndPushRelease.py (1 line): - line 371: # TODO: Should we support Cygwin? lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/BoostingTermBuilder.java (1 line): - line 37: // TODO make function and decoder pluggable somehow? lucene/queries/src/java/org/apache/lucene/queries/spans/SpanNotQuery.java (1 line): - line 174: // TODO: this logic is ugly and sneaky, can we clean it up? lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PassageScorer.java (1 line): - line 33: // TODO: this formula is completely made up. It might not provide relevant snippets! lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/utils/DrillSidewaysFacetOrchestrator.java (1 line): - line 47: // TODO: this looks fragile as it duplicates index assignment logic from DrillDownQuery. lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/recorders/LeafFacetRecorder.java (1 line): - line 29: * TODO: Rename: collect? accumulate? lucene/replicator/src/java/org/apache/lucene/replicator/nrt/ReplicaFileDeleter.java (1 line): - line 51: // TODO: this local IR could incRef files here, like we do now with IW's NRT readers ... then we lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java (1 line): - line 39: // TODO: one optimization we could do is to pre-fill lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java (1 line): - line 82: // TODO: a more efficient approach would be Aho/Corasick's lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java (1 line): - line 104: // TODO: since we don't write any norms file if there are no norms, lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersContentSource.java (1 line): - line 99: // TODO implement? lucene/spatial-extras/src/java/org/apache/lucene/spatial/bbox/BBoxOverlapRatioValueSource.java (1 line): - line 81: // TODO option to compute geodetic area lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IfFunction.java (1 line): - line 109: // TODO: we need types of trueSource / falseSource to handle this lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java (1 line): - line 345: // TODO: Can we read docIDs from disk directly instead of loading giant arrays in memory? lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/query/DistanceRewriteQuery.java (1 line): - line 37: // TODO implement this lucene/core/src/java/org/apache/lucene/search/SortedNumericSelector.java (1 line): - line 40: // TODO: we could do MEDIAN in constant time (at most 2 lookups) lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java (1 line): - line 137: // TODO maybe we should make ArabicNormalization filter also KeywordAttribute aware?! lucene/core/src/java/org/apache/lucene/search/ExactPhraseMatcher.java (1 line): - line 71: // TODO: only do this when this is the top-level scoring clause lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java (1 line): - line 37: // TODO: find a better name; this defines the API that the lucene/replicator/src/java/org/apache/lucene/replicator/nrt/ReplicaNode.java (1 line): - line 632: // TODO: we could maybe transferAndCancel here? except CopyJob can't transferAndCancel lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java (1 line): - line 128: // TODO: the optimized cases (jdk methods) will already do such checks, maybe re-organize this? lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/IDVersionPostingsWriter.java (1 line): - line 88: // TODO: LUCENE-5693: we don't need this check if we fix IW to not send deleted docs to us on lucene/queries/src/java/org/apache/lucene/queries/intervals/TermIntervalsSource.java (1 line): - line 288: // TODO: When intervals move to core, refactor to use the copy of this in PhraseQuery lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/AnalysisOffsetStrategy.java (1 line): - line 81: // TODO we could make this go away. MemoryIndexOffsetStrategy could simply split and analyze each lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java (1 line): - line 70: // TODO: Review if this comparator is really needed for TST to work correctly!!! lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/FacetFieldCollector.java (1 line): - line 50: // TODO: Some FacetRecorders might need scores, e.g. to get associated numeric values, see for lucene/core/src/java/org/apache/lucene/util/mutable/MutableValueInt.java (1 line): - line 71: // TODO: if used in HashMap, it already mixes the value... maybe use a straight value? lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java (1 line): - line 47: // TODO: hmm can we avoid this "extra" lookup?: lucene/core/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8.java (1 line): - line 54: // TODO: maybe move to UnicodeUtil? lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataOutput.java (1 line): - line 481: // TODO: perhaps we can move it out to an utility class (as a supplier of preconfigured lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/query/SimpleTermRewriteQuery.java (1 line): - line 58: // TODO: implement this lucene/spatial-extras/src/java/org/apache/lucene/spatial/query/SpatialOperation.java (1 line): - line 44: // TODO rename to SpatialPredicate. Use enum? LUCENE-5771 lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java (1 line): - line 57: * TODO maybe it would make sense to abstract this even further and allow to lucene/analysis/common/src/java/org/apache/lucene/analysis/core/FlattenGraphFilter.java (1 line): - line 186: // TODO: we could shrink the frontier here somewhat if we lucene/facet/src/java/org/apache/lucene/facet/LongValueFacetCounts.java (1 line): - line 568: // TODO: should we impl this? lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetCounts.java (1 line): - line 261: // TODO: should we impl this? lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/NumberRangePrefixTreeStrategy.java (1 line): - line 161: // TODO consider a variable-level structure -- more general purpose. lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java (1 line): - line 238: // TODO: paranoia? if we backtrack thru an infinite DFA, the loop detection is important! lucene/core/src/java/org/apache/lucene/codecs/CompoundFormat.java (1 line): - line 34: // TODO: this is very minimal. If we need more methods, lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishContentSource.java (1 line): - line 34: // TODO: we could take param to specify locale... lucene/core/src/java/org/apache/lucene/search/QueryCachingPolicy.java (1 line): - line 30: // TODO: add APIs for integration with IndexWriter.IndexReaderWarmer lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java (1 line): - line 194: // TODO: If getTopDims becomes a common use-case, we could consider always indexing dim lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferedInputIterator.java (1 line): - line 35: // TODO keep this for now lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java (1 line): - line 142: // TODO: can this same OOM happen in THPF? lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java (1 line): - line 256: // TODO: better to use condition to notify lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/core/nodes/QueryNode.java (1 line): - line 27: // TODO: this interface might be changed in the future lucene/core/src/java/org/apache/lucene/index/SortingStoredFieldsConsumer.java (1 line): - line 147: // TODO: can we avoid new BR here? lucene/core/src/java/org/apache/lucene/util/ResourceLoader.java (1 line): - line 32: // TODO: fix exception handling lucene/core/src/java/org/apache/lucene/index/SortingTermVectorsConsumer.java (1 line): - line 122: // count manually! TODO: Maybe enforce that Fields.size() returns something valid? lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java (1 line): - line 59: // TODO: ByteBlockPool assume the blockSize is always {@link BYTE_BLOCK_SIZE}, but this class lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/TokenInfoFST.java (1 line): - line 58: // TODO: jump to cacheFloor, readNextRealArc to ceiling? (just be careful we don't add bugs) lucene/core/src/java/org/apache/lucene/document/LongDistanceFeatureQuery.java (1 line): - line 451: // TODO: what is the right factor compared to the current disi? Is 8 optimal? lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java (1 line): - line 28: // TODO: maybe also toFST? then we can translate atts into FST outputs/weights lucene/facet/src/java/org/apache/lucene/facet/FacetsCollectorManager.java (1 line): - line 283: // TODO: if we fix type safety of TopFieldDocs we can lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java (1 line): - line 464: // TODO: Validate? lucene/core/src/java/org/apache/lucene/util/StableStringSorter.java (1 line): - line 67: // TODO: Maybe tim sort is better? lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50CompoundReader.java (1 line): - line 59: // TODO: we should just pre-strip "entries" and append segment name up-front like simpletext? lucene/core/src/java/org/apache/lucene/document/TextField.java (1 line): - line 46: // TODO: add sugar for term vectors...? lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java (1 line): - line 379: // TODO Some day we should make the query cache in this module configurable and control lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/TreeCellIterator.java (1 line): - line 88: // TODO implement a smart nextFrom() that looks at the parent's bytes first lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymGraphFilterFactory.java (1 line): - line 160: // TODO: expose dedup as a parameter? lucene/analysis/kuromoji/src/java/overview.html (1 line): - line 32: