# HG changeset patch
# User David Scott <dave.scott@xxxxxxxxxxxxx>
# Date 1279751378 -3600
# Node ID d052a41ffabe74f791a1d62bd178a1cac13dc332
# Parent 542efa53c3e25342a175ead7f9327b972f976820
Add functions Zerocheck.get_next{zero,nonzero} which can be used to detect
sparseness (relatively efficiently). Both functions primarily use aligned
unsigned int-sized accesses.
Signed-off-by: David Scott <dave.scott@xxxxxxxxxxxxx>
diff -r 542efa53c3e2 -r d052a41ffabe stdext/zerocheck.ml
--- a/stdext/zerocheck.ml Wed Jul 21 23:29:15 2010 +0100
+++ b/stdext/zerocheck.ml Wed Jul 21 23:29:38 2010 +0100
@@ -12,3 +12,30 @@
* GNU Lesser General Public License for more details.
*)
external is_all_zeros : string -> int -> bool = "is_all_zeros"
+
+external _find_a_nonzero : string -> int -> int -> int = "find_a_nonzero"
+external _find_a_zero : string -> int -> int -> int = "find_a_zero"
+
+let wrap f x len offset =
+ let remaining = len - offset in
+ if remaining <= 0 then raise (Invalid_argument "offset > length");
+ let result = f x offset remaining in
+ if result = remaining then None else Some (result + offset)
+
+let find_a_nonzero = wrap _find_a_nonzero
+let find_a_zero = wrap _find_a_zero
+
+let fold_over_nonzeros x len roundup f initial =
+ let rec inner acc offset =
+ if offset = len then acc
+ else
+ match find_a_nonzero x len offset with
+ | None -> acc (* no more *)
+ | Some s ->
+ let e = match find_a_zero x len s with
+ | None -> len
+ | Some e -> e in
+ let e = min len (roundup e) in
+ inner (f acc (s, e - s)) e in
+ inner initial 0
+
diff -r 542efa53c3e2 -r d052a41ffabe stdext/zerocheck.mli
--- a/stdext/zerocheck.mli Wed Jul 21 23:29:15 2010 +0100
+++ b/stdext/zerocheck.mli Wed Jul 21 23:29:38 2010 +0100
@@ -11,4 +11,24 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*)
+
+(** [is_all_zeroes x len] returns true if the substring is all zeroes *)
external is_all_zeros : string -> int -> bool = "is_all_zeros"
+
+(** [find_a_zero x len offset] returns the offset in [x] of a zero
+ character after [offset], or None if no zero was detected.
+ Note this function is approximate and is not guaranteed to find
+ strictly the first zero. *)
+val find_a_zero: string -> int -> int -> int option
+
+(** [find_a_nonzero x len offset] returns the offset in [x] of a
+ nonzero character after [offset], or None if none could be detected.
+ Note this function is approximate and is not guaranteed to find
+ strictly the first nonzero. *)
+val find_a_nonzero: string -> int -> int -> int option
+
+(** [fold_over_nonzeros buf len roundup f initial] folds [f] over all
+ (start, length) pairs of non-zero data in string [buf] up to [len].
+ The end offset of each pair is rounded up with [roundup] (e.g. to
+ potential block boudaries. *)
+val fold_over_nonzeros: string -> int -> (int -> int) -> ('a -> int * int ->
'a) -> 'a -> 'a
diff -r 542efa53c3e2 -r d052a41ffabe stdext/zerocheck_stub.c
--- a/stdext/zerocheck_stub.c Wed Jul 21 23:29:15 2010 +0100
+++ b/stdext/zerocheck_stub.c Wed Jul 21 23:29:38 2010 +0100
@@ -18,6 +18,88 @@
#include <caml/signals.h>
#include <caml/fail.h>
+#define OFFSET(s) (((unsigned int)s) & (sizeof(unsigned int) - 1))
+
+
+/* Return the offset of the next non-zero byte (possibly rounded down a bit).
+ The value 'remaining' is returned if there is no non-zero byte found. */
+value find_a_nonzero(value string, value offset, value remaining)
+{
+ CAMLparam3(string, offset, remaining);
+ int c_offset = Int_val(offset);
+ int c_remaining = Int_val(remaining);
+ int c_origremaining = c_remaining;
+ char *c_string = String_val(string);
+ char *s = c_string + c_offset;
+
+ /* Go character by character until we hit an unsigned int boundary */
+ while ((OFFSET(s) != 0) && (c_remaining > 0)){
+ if (*s != '\000') goto finish;
+ s++; c_remaining--;
+ }
+ /* Go word by word. Note we don't need to determine the exact position
+ of the nonzero, it suffices to return the index of the word
containing
+ the nonzero. */
+ unsigned int *p = (unsigned int *)s;
+ while (c_remaining > 4){
+ if (*p != 0) goto finish;
+ p++; c_remaining-=4;
+ }
+ /* Go character by character until the end of the string */
+ s = (char*) p;
+ while (c_remaining > 0){
+ if (*s != '\000') goto finish;
+ s++; c_remaining--;
+ }
+ /* c_remaining == 0 */
+ finish:
+ /* If we didn't find a nonzero then we return c_origremaining.
+ If we did then we return the number of chars after the starting
+ offset where the word containing the nonzero was detected. */
+ CAMLreturn(Val_int(c_origremaining - c_remaining));
+
+}
+
+/* Return the offset of the next zero byte (possibly rounded up a bit).
+ The value 'remaining' is returned if there is no zero byte found. */
+value find_a_zero(value string, value offset, value remaining)
+{
+ CAMLparam3(string, offset, remaining);
+ int c_offset = Int_val(offset);
+ int c_remaining = Int_val(remaining);
+ int c_origremaining = c_remaining;
+ char *c_string = String_val(string);
+ char *s = c_string + c_offset;
+
+ /* Go character by character until we hit an unsigned int boundary */
+ while ((OFFSET(s) != 0) && (c_remaining > 0)){
+ if (*s == '\000') goto finish;
+ s++; c_remaining--;
+ }
+ /* Go word by word. Note we don't need to determine the exact position
+ of the zero, it suffices to return the index of the word following
+ the zero. */
+ unsigned int *p = (unsigned int *)s;
+ while (c_remaining > 4){
+ if (*p == 0) goto finish;
+ p++; c_remaining-=4;
+ }
+ /* Go character by character until the end of the string */
+ s = (char*) p;
+ while (c_remaining > 0){
+ if (*s == '\000') goto finish;
+ s++; c_remaining--;
+ }
+ /* c_remaining == 0 */
+ finish:
+ /* If we didn't find a zero then we return c_origremaining.
+ If we did then we return the number of chars after the starting
+ offset where the word containing the zero was detected. */
+ CAMLreturn(Val_int(c_origremaining - c_remaining));
+}
+
+
+
/* for better performance in all case, we should process the unalign data at
* the beginning until we reach a 32 bit align value, however since ocaml
* allocate the string and we don't use any offset in this string, the string
stdext/zerocheck.ml | 27 ++++++++++++++++
stdext/zerocheck.mli | 20 +++++++++++
stdext/zerocheck_stub.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 129 insertions(+), 0 deletions(-)
xen-api-libs.hg.patch
Description: Text Data
_______________________________________________
xen-api mailing list
xen-api@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/mailman/listinfo/xen-api
|