regex-comparison

December 24, 2020 ยท View on GitHub

extremely basic comparison of regex in different languages. Performed the following regular expression '\d+' on 100 lines of text (each containing text and 4 numbers). This was repeated 100.000 times. Each found number was summed up to receive a total of all the iterations, to validate result. These result show elapsed time for just a single regular expression example, so please do not take the numbers as a benchmark of the different languages/options.

optiontimeversionmem. bytes (max.resident set size)mem. bytes(peak memory footprint)bin.sizeremarks
PyPy6.42s7.3.1 with GCC41.349.12034.197.504-
V (pcre)7.72s0.2 (f7787ff)1.301.581.8241.300.992.00035KBcompiled with -prod
Crystal8.77s0.35.1 (LLVM 9.0.1)3.235.8402.342.912403KBcompiled with --release
Go11.80s1.15.510.051.5847.196.6722.4MB
V (pcre) AF13.52s0.2 (f7787ff)3.577.479.1683.576.889.34435KBcompiled with -prod -autofree
V (regex)17.28s0.2 (f7787ff)2.845.835.2642.845.364.22465KBcompiled with -prod
V (regex) AF18.31s0.2 (f7787ff)1.301.434.3681.300.963.32865KBcompiled with -prod -autofree
Python28.72s3.8.26.991.8724.620.288-

V (regex) code used

import regex

const (
	inputs = [
		'departure location: 43-237 or 251-961',
		'departure station: 27-579 or 586-953',
		'departure platform: 31-587 or 608-967',
		'departure track: 26-773 or 784-973',
		'departure date: 41-532 or 552-956',
		'departure time: 33-322 or 333-972',
		'arrival location: 30-165 or 178-965',
		'arrival station: 31-565 or 571-968',
		'arrival platform: 36-453 or 473-963',
		'arrival track: 35-912 or 924-951',
		'class: 39-376 or 396-968',
		'duration: 31-686 or 697-974',
		'price: 28-78 or 96-971',
		'route: 32-929 or 943-955',
		'row: 40-885 or 896-968',
		'seat: 26-744 or 765-967',
		'train: 46-721 or 741-969',
		'type: 30-626 or 641-965',
		'wagon: 48-488 or 513-971',
		'zone: 34-354 or 361-973',
		'departure location: 43-237 or 251-961',
		'departure station: 27-579 or 586-953',
		'departure platform: 31-587 or 608-967',
		'departure track: 26-773 or 784-973',
		'departure date: 41-532 or 552-956',
		'departure time: 33-322 or 333-972',
		'arrival location: 30-165 or 178-965',
		'arrival station: 31-565 or 571-968',
		'arrival platform: 36-453 or 473-963',
		'arrival track: 35-912 or 924-951',
		'class: 39-376 or 396-968',
		'duration: 31-686 or 697-974',
		'price: 28-78 or 96-971',
		'route: 32-929 or 943-955',
		'row: 40-885 or 896-968',
		'seat: 26-744 or 765-967',
		'train: 46-721 or 741-969',
		'type: 30-626 or 641-965',
		'wagon: 48-488 or 513-971',
		'zone: 34-354 or 361-973',
		'departure location: 43-237 or 251-961',
		'departure station: 27-579 or 586-953',
		'departure platform: 31-587 or 608-967',
		'departure track: 26-773 or 784-973',
		'departure date: 41-532 or 552-956',
		'departure time: 33-322 or 333-972',
		'arrival location: 30-165 or 178-965',
		'arrival station: 31-565 or 571-968',
		'arrival platform: 36-453 or 473-963',
		'arrival track: 35-912 or 924-951',
		'class: 39-376 or 396-968',
		'duration: 31-686 or 697-974',
		'price: 28-78 or 96-971',
		'route: 32-929 or 943-955',
		'row: 40-885 or 896-968',
		'seat: 26-744 or 765-967',
		'train: 46-721 or 741-969',
		'type: 30-626 or 641-965',
		'wagon: 48-488 or 513-971',
		'zone: 34-354 or 361-973',
		'departure location: 43-237 or 251-961',
		'departure station: 27-579 or 586-953',
		'departure platform: 31-587 or 608-967',
		'departure track: 26-773 or 784-973',
		'departure date: 41-532 or 552-956',
		'departure time: 33-322 or 333-972',
		'arrival location: 30-165 or 178-965',
		'arrival station: 31-565 or 571-968',
		'arrival platform: 36-453 or 473-963',
		'arrival track: 35-912 or 924-951',
		'class: 39-376 or 396-968',
		'duration: 31-686 or 697-974',
		'price: 28-78 or 96-971',
		'route: 32-929 or 943-955',
		'row: 40-885 or 896-968',
		'seat: 26-744 or 765-967',
		'train: 46-721 or 741-969',
		'type: 30-626 or 641-965',
		'wagon: 48-488 or 513-971',
		'zone: 34-354 or 361-973',
		'departure location: 43-237 or 251-961',
		'departure station: 27-579 or 586-953',
		'departure platform: 31-587 or 608-967',
		'departure track: 26-773 or 784-973',
		'departure date: 41-532 or 552-956',
		'departure time: 33-322 or 333-972',
		'arrival location: 30-165 or 178-965',
		'arrival station: 31-565 or 571-968',
		'arrival platform: 36-453 or 473-963',
		'arrival track: 35-912 or 924-951',
		'class: 39-376 or 396-968',
		'duration: 31-686 or 697-974',
		'price: 28-78 or 96-971',
		'route: 32-929 or 943-955',
		'row: 40-885 or 896-968',
		'seat: 26-744 or 765-967',
		'train: 46-721 or 741-969',
		'type: 30-626 or 641-965',
		'wagon: 48-488 or 513-971',
		'zone: 34-354 or 361-973',
	]
)

fn main() {
	pattern := r'\d+'
	mut re := regex.regex_opt(pattern) or { panic(err) }
	mut total := u64(0)
	for _ in 0 .. 100000 {
		for input in inputs {
			idxs := re.find_all(input)
			for i := 0; i < idxs.len; i += 2 {
				total += input[idxs[i]..idxs[i + 1]].u64()
			}
		}
	}
	println(total)
}
// 21159000000
// ./regex_digit  20.68s user 0.91s system 99% cpu 21.759 total