Sample trim_scale UDF for older versions of PostgreSQL
May 19, 2025 ยท View on GitHub
In some circumstances DVT uses the PostgreSQL built in trim_scale() function to format decimals in a way compatible with other SQL engines. trim_scale() was introduced in PostgreSQL 13 therefore it is possible to encounter the following error when running DVT on older versions of PostgreSQL:
sqlalchemy.exc.ProgrammingError: (psycopg2.errors.UndefinedFunction) function trim_scale(numeric) does not exist
To workaround this a custom UDF can be created, as described in this Stack Overflow post. An example can be found here.
Correctness test
Below I created the trim_scale UDF in a system with PostgreSQL's internal trim_scale function to compare results:
CREATE FUNCTION dvt_test.trim_scale(numeric)
RETURNS numeric
AS $$
SELECT CASE
WHEN trim(\$1::text, '0') IN ('.', '') THEN 0
WHEN trim(\$1::text, '0')::numeric = \$1 THEN trim(\$1::text, '0')::numeric
ELSE \$1 END
$$ LANGUAGE SQL;
SELECT c, trim_scale(c) pg_trim_scale, dvt_test.trim_scale(c) trim_scale_udf
FROM (
SELECT UNNEST(ARRAY[
NULL::decimal,
0::decimal,
1::decimal,
-1::decimal,
0.0::decimal,
-0.0::decimal,
0.0001::decimal,
0.100100::decimal,
0.1230000::decimal,
0.01234567890::decimal,
-0.01234567890::decimal,
12345678901234567890::decimal
]) AS c
) r;
c | pg_trim_scale | trim_scale_udf
----------------------+----------------------+----------------------
| |
0 | 0 | 0
1 | 1 | 1
-1 | -1 | -1
0.0 | 0 | 0
0.0 | 0 | 0
0.0001 | 0.0001 | 0.0001
0.100100 | 0.1001 | 0.1001
0.1230000 | 0.123 | 0.123
0.01234567890 | 0.0123456789 | 0.0123456789
-0.01234567890 | -0.0123456789 | -0.0123456789
12345678901234567890 | 12345678901234567890 | 12345678901234567890
Performance test
This basic performance test generates a table with 5 million rows each containing 10 decimal columns. This step is executed five times so an average elapsed time can be obtained at the end.
A new table is created with the same approach but applying the built in trim_scale() function and afterwards an other table is also created in the same way but using the proposed DVT time_scale() UDF. This gives us the overhead for each technique.
Baseline
CREATE TABLE trim_scale_tab
AS
SELECT (i::decimal) c1
, (i+1::decimal) c2
, (i+2::decimal) c3
, (i+3::decimal) c4
, (i+4::decimal) c5
, (i+5::decimal) c6
, (i+6::decimal) c7
, (i+7::decimal) c8
, (i+8::decimal) c9
, (i+9::decimal) c10
FROM generate_series(1,5000000) AS t(i);
Time: 12268.008 ms (00:12.268)
Time: 12113.171 ms (00:12.113)
Time: 12875.416 ms (00:12.875)
Time: 13020.033 ms (00:13.020)
Time: 12079.489 ms (00:12.079)
Average: 12471ms
Built in function
CREATE TABLE trim_scale_tab
AS
SELECT trim_scale(i::decimal) c1
, trim_scale(i+1::decimal) c2
, trim_scale(i+2::decimal) c3
, trim_scale(i+3::decimal) c4
, trim_scale(i+4::decimal) c5
, trim_scale(i+5::decimal) c6
, trim_scale(i+6::decimal) c7
, trim_scale(i+7::decimal) c8
, trim_scale(i+8::decimal) c9
, trim_scale(i+9::decimal) c10
FROM generate_series(1,5000000) AS t(i);
Time: 19397.326 ms (00:19.397)
Time: 14243.871 ms (00:14.244)
Time: 14296.588 ms (00:14.297)
Time: 13895.630 ms (00:13.896)
Time: 14107.917 ms (00:14.108)
Average: 15188ms Delta: +2717ms (+22%)
Custom UDF
CREATE TABLE trim_scale_tab
AS
SELECT dvt_test.trim_scale(i::decimal) c1
, dvt_test.trim_scale(i+1::decimal) c2
, dvt_test.trim_scale(i+2::decimal) c3
, dvt_test.trim_scale(i+3::decimal) c4
, dvt_test.trim_scale(i+4::decimal) c5
, dvt_test.trim_scale(i+5::decimal) c6
, dvt_test.trim_scale(i+6::decimal) c7
, dvt_test.trim_scale(i+7::decimal) c8
, dvt_test.trim_scale(i+8::decimal) c9
, dvt_test.trim_scale(i+9::decimal) c10
FROM generate_series(1,5000000) AS t(i);
Time: 92619.294 ms (01:32.619)
Time: 66356.498 ms (01:06.356)
Time: 61986.184 ms (01:01.986)
Time: 61201.870 ms (01:01.202)
Time: 61640.150 ms (01:01.640)
Average: 68761ms Delta: +56290ms (+450%)
Inline CASE expression
CREATE TABLE trim_scale_tab
AS
SELECT CASE
WHEN trim(i::text, '0') IN ('.', '') THEN 0
WHEN trim(i::text, '0')::numeric = i THEN trim(i::text, '0')::numeric
ELSE i END c1,
CASE
WHEN trim((i+1)::text, '0') IN ('.', '') THEN 0
WHEN trim((i+1)::text, '0')::numeric = i+1 THEN trim((i+1)::text, '0')::numeric
ELSE i+1 END c2,
CASE
WHEN trim((i+2)::text, '0') IN ('.', '') THEN 0
WHEN trim((i+2)::text, '0')::numeric = i+2 THEN trim((i+2)::text, '0')::numeric
ELSE i+2 END c3,
CASE
WHEN trim((i+3)::text, '0') IN ('.', '') THEN 0
WHEN trim((i+3)::text, '0')::numeric = i+3 THEN trim((i+3)::text, '0')::numeric
ELSE i+3 END c4,
CASE
WHEN trim((i+4)::text, '0') IN ('.', '') THEN 0
WHEN trim((i+4)::text, '0')::numeric = i+4 THEN trim((i+4)::text, '0')::numeric
ELSE i+4 END c5,
CASE
WHEN trim((i+5)::text, '0') IN ('.', '') THEN 0
WHEN trim((i+5)::text, '0')::numeric = i+5 THEN trim((i+5)::text, '0')::numeric
ELSE i+5 END c6,
CASE
WHEN trim((i+6)::text, '0') IN ('.', '') THEN 0
WHEN trim((i+6)::text, '0')::numeric = i+6 THEN trim((i+6)::text, '0')::numeric
ELSE i+6 END c7,
CASE
WHEN trim((i+7)::text, '0') IN ('.', '') THEN 0
WHEN trim((i+7)::text, '0')::numeric = i+7 THEN trim((i+7)::text, '0')::numeric
ELSE i+7 END c8,
CASE
WHEN trim((i+8)::text, '0') IN ('.', '') THEN 0
WHEN trim((i+8)::text, '0')::numeric = i+8 THEN trim((i+8)::text, '0')::numeric
ELSE i+8 END c9,
CASE
WHEN trim((i+9)::text, '0') IN ('.', '') THEN 0
WHEN trim((i+9)::text, '0')::numeric = i+9 THEN trim((i+9)::text, '0')::numeric
ELSE i+9 END c10
FROM generate_series(1,5000000) AS t(i);
Time: 51362.997 ms (00:51.363)
Time: 44526.158 ms (00:44.526)
Time: 43060.392 ms (00:43.060)
Time: 42366.208 ms (00:42.366)
Time: 61329.184 ms (01:01.329)
Average: 48529ms Delta: +36058ms (+289%)