mirror of
https://github.com/chylex/Brotli-Builder.git
synced 2024-11-25 07:42:56 +01:00
99 lines
4.5 KiB
C#
99 lines
4.5 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.Diagnostics;
|
|
using System.Linq;
|
|
using BrotliLib.Brotli.Dictionary.Format;
|
|
using BrotliLib.Brotli.Dictionary.Transform;
|
|
using BrotliLib.Collections;
|
|
|
|
namespace BrotliLib.Brotli.Dictionary.Index{
|
|
public sealed class BrotliDictionaryIndex{
|
|
private readonly IDictionaryFormat format;
|
|
private readonly IReadOnlyList<WordTransform> transforms;
|
|
private readonly Dictionary<TransformType, PatriciaTree<(int, int)>> lookups;
|
|
|
|
private readonly List<int> transformsNoPrefix;
|
|
private readonly List<int> transformsWithPrefix;
|
|
|
|
internal BrotliDictionaryIndex(BrotliDictionary dictionary){
|
|
this.format = dictionary.Format;
|
|
this.transforms = dictionary.Transforms;
|
|
|
|
Stopwatch sw = Stopwatch.StartNew();
|
|
|
|
this.lookups = TransformTypes.All.ToDictionary(type => type, type => {
|
|
var tree = new PatriciaTree<(int, int)>();
|
|
|
|
foreach(var length in format.WordLengths.Where(length => type.GetTransformedLength(length) >= 1)){
|
|
for(int word = 0, count = format.WordCount(length); word < count; word++){
|
|
byte[] bytes = type.Process(dictionary.ReadRaw(length, word));
|
|
tree.Insert(bytes, (length, word));
|
|
}
|
|
}
|
|
|
|
return tree;
|
|
});
|
|
|
|
this.transformsNoPrefix = Enumerable.Range(0, transforms.Count).Where(index => transforms[index].Prefix.Length == 0).ToList();
|
|
this.transformsWithPrefix = Enumerable.Range(0, transforms.Count).Where(index => transforms[index].Prefix.Length > 0).ToList();
|
|
|
|
sw.Stop();
|
|
Debug.WriteLine("Constructed dictionary index in " + sw.ElapsedMilliseconds + " ms.");
|
|
}
|
|
|
|
/// <summary>
|
|
/// Finds all <see cref="DictionaryIndexEntry"/> that match prefixes between <paramref name="minLength"/> and <paramref name="maxLength"/> long in the input <paramref name="bytes"/>.
|
|
/// The order of results is undefined.
|
|
/// </summary>
|
|
public List<DictionaryIndexEntry> Find(ArraySegment<byte> bytes, int minLength = 1, int maxLength = int.MaxValue){
|
|
var entries = new List<DictionaryIndexEntry>();
|
|
|
|
// default dictionary guarantees executing 44 identity, 12 ferment first, 12 ferment all transformations
|
|
var identityNoPrefixWords = lookups[TransformType.Identity].FindAll(bytes, minLength);
|
|
var fermentFirstNoPrefixWords = lookups[TransformType.FermentFirst].FindAll(bytes, minLength);
|
|
var fermentAllNoPrefixWords = lookups[TransformType.FermentAll].FindAll(bytes, minLength);
|
|
|
|
IEnumerable<(int, int)> LookupWords(TransformType type, int prefixLength){
|
|
if (prefixLength == 0){
|
|
switch(type){
|
|
case TransformType.Identity: return identityNoPrefixWords;
|
|
case TransformType.FermentFirst: return fermentFirstNoPrefixWords;
|
|
case TransformType.FermentAll: return fermentAllNoPrefixWords;
|
|
}
|
|
}
|
|
|
|
if (prefixLength <= bytes.Count){
|
|
return lookups[type].FindAll(bytes.Slice(prefixLength), Math.Max(1, minLength - prefixLength));
|
|
}
|
|
else{
|
|
return Enumerable.Empty<(int, int)>();
|
|
}
|
|
}
|
|
|
|
var transformsMatchingPrefix = transformsNoPrefix.Concat(transformsWithPrefix.Where(index => CollectionHelper.ContainsAt(bytes, 0, transforms[index].Prefix)));
|
|
|
|
foreach(var transform in transformsMatchingPrefix){
|
|
var wt = transforms[transform];
|
|
|
|
var type = wt.Type;
|
|
var prefix = wt.Prefix;
|
|
var suffix = wt.Suffix;
|
|
|
|
int prefixLength = prefix.Length;
|
|
int suffixLength = suffix.Length;
|
|
|
|
foreach(var (length, word) in LookupWords(type, prefixLength)){
|
|
int transformedLength = type.GetTransformedLength(length);
|
|
int outputLength = transformedLength + prefixLength + suffixLength;
|
|
|
|
if (outputLength >= minLength && outputLength <= maxLength && CollectionHelper.ContainsAt(bytes, prefixLength + transformedLength, suffix)){
|
|
entries.Add(new DictionaryIndexEntry(format.GetPackedValue(length, word, transform), length, outputLength));
|
|
}
|
|
}
|
|
}
|
|
|
|
return entries;
|
|
}
|
|
}
|
|
}
|