| 1 | #!/usr/bin/perl |
|---|
| 2 | |
|---|
| 3 | # Simple perl script to take portable SQL |
|---|
| 4 | # and generate database specific DDL |
|---|
| 5 | |
|---|
| 6 | use strict; |
|---|
| 7 | |
|---|
| 8 | sub usage { |
|---|
| 9 | return <<EOF; |
|---|
| 10 | $0: Usage: |
|---|
| 11 | |
|---|
| 12 | $0 DBNAME [[DESTINATION] SOURCEFILE] |
|---|
| 13 | |
|---|
| 14 | where DBNAME is DB2, postgres, postgres_preSERIAL, sqlite, oracle or mysql |
|---|
| 15 | EOF |
|---|
| 16 | } |
|---|
| 17 | |
|---|
| 18 | my $dbtarget = shift || die usage(); |
|---|
| 19 | $dbtarget =~ m/^(db2|oracle|sqlite|postgres|postgres_preSERIAL|mysql)$/ || die usage(); |
|---|
| 20 | $dbtarget = lc($dbtarget); |
|---|
| 21 | |
|---|
| 22 | my $inputfile = shift; |
|---|
| 23 | |
|---|
| 24 | my $outputfile = shift; |
|---|
| 25 | if ($outputfile) { |
|---|
| 26 | open(OUTPUT, ">$outputfile") || die "Cannot open '$outputfile': $!\n"; |
|---|
| 27 | select(OUTPUT); |
|---|
| 28 | } |
|---|
| 29 | |
|---|
| 30 | # put inputfile back in ARGV so we can just use <> |
|---|
| 31 | unshift(@ARGV, $inputfile); |
|---|
| 32 | |
|---|
| 33 | |
|---|
| 34 | sub to_sqlite { |
|---|
| 35 | s/BOOLEAN/SMALLINT/g; |
|---|
| 36 | s/TINYINT/SMALLINT/g; |
|---|
| 37 | s/MEDIUMTEXT/TEXT/g; |
|---|
| 38 | s/SERIAL//g; |
|---|
| 39 | } |
|---|
| 40 | |
|---|
| 41 | sub to_oracle { |
|---|
| 42 | s/TEXT/CLOB/g; |
|---|
| 43 | s/VARCHAR/VARCHAR2/g; |
|---|
| 44 | s/TINYINT/NUMBER(3,0)/g; |
|---|
| 45 | s/SMALLINT/NUMBER(6,0)/g; |
|---|
| 46 | s/INTEGER/NUMBER(20,0)/g; |
|---|
| 47 | s/TIMESTAMP/DATE/g; |
|---|
| 48 | s/SERIAL//g; |
|---|
| 49 | if (/^CREATE TABLE (\w+) \(/) { |
|---|
| 50 | $_ .= <<SEQ; |
|---|
| 51 | |
|---|
| 52 | CREATE SEQUENCE ${1}_seq; |
|---|
| 53 | SEQ |
|---|
| 54 | } |
|---|
| 55 | s/(\s)(?:TEXT|VARCHAR)(\s*)\((\d+)\)/ |
|---|
| 56 | my($space1, $space2, $textlen) = ($1, $2, $3); |
|---|
| 57 | |
|---|
| 58 | my $type; |
|---|
| 59 | if ($textlen < 4001) { |
|---|
| 60 | $type = 'VARCHAR2'; |
|---|
| 61 | $space1 . $type . $space2 . '(' . $textlen . ')'; |
|---|
| 62 | } else { |
|---|
| 63 | $type = 'CLOB'; |
|---|
| 64 | $space1 . $type . $space2; |
|---|
| 65 | } |
|---|
| 66 | /ge; |
|---|
| 67 | s/(\s)BLOB(\s*)\((\d+)\)/ |
|---|
| 68 | my($space1, $space2, $bloblen) = ($1, $2, $3); |
|---|
| 69 | $space1 . 'BLOB' . $space2; |
|---|
| 70 | /ge; |
|---|
| 71 | # default TEXT/BLOB limit is 65535 |
|---|
| 72 | s/(\s)TEXT/$1CLOB/g; |
|---|
| 73 | s/(\s)BLOB/$1BLOB/g; |
|---|
| 74 | } |
|---|
| 75 | |
|---|
| 76 | #sub to_postgres { |
|---|
| 77 | # # datatypes |
|---|
| 78 | # s/BLOB/BYTEA/g; |
|---|
| 79 | # s/TIMESTAMP/TIMESTAMP WITH TIME ZONE/g; |
|---|
| 80 | # s/INTEGER(.*)SERIAL/SERIAL $1/g; |
|---|
| 81 | # s/TINYINT/SMALLINT/g; |
|---|
| 82 | #} |
|---|
| 83 | |
|---|
| 84 | sub to_postgres { |
|---|
| 85 | # datatypes |
|---|
| 86 | s/BLOB/BYTEA/g; |
|---|
| 87 | s/TINYINT/SMALLINT/g; |
|---|
| 88 | s/BOOLEAN/SMALLINT/g; |
|---|
| 89 | s/MEDIUMINT/INTEGER/g; |
|---|
| 90 | s/MEDIUMTEXT/TEXT/g; |
|---|
| 91 | s/TIMESTAMP/TIMESTAMP WITH TIME ZONE/g; |
|---|
| 92 | if (/^CREATE TABLE (\w+) \(/) { |
|---|
| 93 | $_ .= <<SEQ; |
|---|
| 94 | |
|---|
| 95 | CREATE SEQUENCE ${1}_id; |
|---|
| 96 | SEQ |
|---|
| 97 | } |
|---|
| 98 | s/SERIAL//; |
|---|
| 99 | } |
|---|
| 100 | |
|---|
| 101 | # Convert to DB2 DDL format |
|---|
| 102 | |
|---|
| 103 | sub to_db2 { |
|---|
| 104 | s/TEXT/CLOB(128k)/g; |
|---|
| 105 | |
|---|
| 106 | # handle autoincrement |
|---|
| 107 | s/SERIAL(.*),/$1 GENERATED BY DEFAULT AS IDENTITY,/; |
|---|
| 108 | |
|---|
| 109 | # handle 18 char limitation.. grr |
|---|
| 110 | if (m/CREATE INDEX ([a-z0-9_]+)/) { |
|---|
| 111 | my $index_name = $1; |
|---|
| 112 | if (length($index_name) > 17) { |
|---|
| 113 | # get rid of vowels |
|---|
| 114 | $index_name =~ s/[aeiou]//g; |
|---|
| 115 | if (length($index_name) > 17) { |
|---|
| 116 | # umm, truncate it? |
|---|
| 117 | $index_name = substr($index_name, 0,17); |
|---|
| 118 | } |
|---|
| 119 | $index_name =~ s/_$//; |
|---|
| 120 | |
|---|
| 121 | s/CREATE INDEX ([a-z0-9_]+)/CREATE INDEX $index_name/; |
|---|
| 122 | } |
|---|
| 123 | } |
|---|
| 124 | # handle 30 character column name limit, barf... |
|---|
| 125 | if (m/^\s+([a-z_]+)\s+(VARCHAR|TEXT|CHAR|INTEGER)/) { |
|---|
| 126 | if (length($1) > 30) { |
|---|
| 127 | # At this time we hack this to reduce _display_ to _disp_ |
|---|
| 128 | # new sql code with long column names will have to be dealt with |
|---|
| 129 | unless (s/_display_/_disp_/) { |
|---|
| 130 | die "Found long column name $1 that we can't deal with\n"; |
|---|
| 131 | } |
|---|
| 132 | } |
|---|
| 133 | } |
|---|
| 134 | |
|---|
| 135 | } |
|---|
| 136 | |
|---|
| 137 | # Convert to MySQL DDL format |
|---|
| 138 | |
|---|
| 139 | sub to_mysql { |
|---|
| 140 | s/(\s)(?:TEXT|VARCHAR)(\s*)\((\d+)\)/ |
|---|
| 141 | my($space1, $space2, $textlen) = ($1, $2, $3); |
|---|
| 142 | |
|---|
| 143 | my $type; |
|---|
| 144 | if ($textlen < 256) { |
|---|
| 145 | $type = 'VARCHAR'; |
|---|
| 146 | } elsif ($textlen < 65536) { |
|---|
| 147 | $type = 'TEXT'; |
|---|
| 148 | } elsif ($textlen < 16777216) { |
|---|
| 149 | $type = 'MEDIUMTEXT'; |
|---|
| 150 | } else { |
|---|
| 151 | $type = 'LONGTEXT'; |
|---|
| 152 | } |
|---|
| 153 | $space1 . $type . $space2 . '(' . $textlen . ')'; |
|---|
| 154 | /ge; |
|---|
| 155 | s/(\s)BLOB(\s*)\((\d+)\)/ |
|---|
| 156 | my($space1, $space2, $bloblen) = ($1, $2, $3); |
|---|
| 157 | my $type; |
|---|
| 158 | if ($bloblen < 256) { |
|---|
| 159 | $type = 'TINYBLOB'; |
|---|
| 160 | } elsif ($bloblen < 65536) { |
|---|
| 161 | $type = 'MEDIUMBLOB'; |
|---|
| 162 | } elsif ($bloblen < 16777216) { |
|---|
| 163 | $type = 'LONGBLOB'; |
|---|
| 164 | } |
|---|
| 165 | $space1 . $type . $space2 . '(' . $bloblen . ')'; |
|---|
| 166 | /ge; |
|---|
| 167 | |
|---|
| 168 | # default TEXT/BLOB limit is 65535 |
|---|
| 169 | #s/(\s)TEXT/$1MEDIUMTEXT/g; |
|---|
| 170 | #s/(\s)BLOB/$1MEDIUMBLOB/g; |
|---|
| 171 | |
|---|
| 172 | # TIMESTAMP should become DATETIME, because we don't want it to |
|---|
| 173 | # auto-update (the drivers will handle that). |
|---|
| 174 | s/_created_on\s+TIMESTAMP/_created_on DATETIME/g; # except for the modified_on fields |
|---|
| 175 | s/blog_children_modified_on\s+TIMESTAMP/blog_children_modified_on DATETIME/g; # except for the modified_on fields |
|---|
| 176 | |
|---|
| 177 | # handle autoincrement |
|---|
| 178 | s/SERIAL/AUTO_INCREMENT/; |
|---|
| 179 | } |
|---|
| 180 | |
|---|
| 181 | my $conv_func = 'to_' . $dbtarget; |
|---|
| 182 | |
|---|
| 183 | local $/ = ';'; |
|---|
| 184 | |
|---|
| 185 | while (<>) { |
|---|
| 186 | s/^\-.*$//mg; ## Strip comments on their own lines. |
|---|
| 187 | s/^\s*//; |
|---|
| 188 | s/\s*$//; |
|---|
| 189 | # can't hurt, saves keystrokes |
|---|
| 190 | s/PRIMARY KEY/PRIMARY KEY NOT NULL/; |
|---|
| 191 | no strict 'refs'; |
|---|
| 192 | &{$conv_func}; |
|---|
| 193 | use strict 'refs'; |
|---|
| 194 | print $_, "\n"; |
|---|
| 195 | } |
|---|
| 196 | |
|---|
| 197 | # post stage.. Write out sequences if needed. |
|---|