AWK: statistics operations of multi-column CSV data - csv

With the aim to perform some statistical analysis of multi-column data I am analyzing big number of CSV filles using the following bash + AWK routine:
#!/bin/bash
home="$PWD"
# folder with the outputs
rescore="${home}"/rescore
# folder with the folders to analyse
storage="${home}"/results
#cd "${home}"/results
cd ${storage}
csv_pattern='*_filt.csv'
while read -r d; do
awk -v rescore="$rescore" '
FNR==1 {
if (n)
mean[suffix] = s/n
prefix=suffix=FILENAME
sub(/_.*/, "", prefix)
sub(/\/[^\/]+$/, "", suffix)
sub(/^.*_/, "", suffix)
s=n=0
}
FNR > 1 {
s += $3
++n
}
END {
out = rescore "/" prefix ".csv"
mean[suffix] = s/n
print prefix ":", "dG(mean)" > out
for (i in mean)
printf "%s: %.2f\n", i, mean[i] >> out
close(out)
}' "${d}_"*/${csv_pattern} #> "${rescore}/"${d%%_*}".csv"
done < <(find . -maxdepth 1 -type d -name '*_*_*' | awk -F '[_/]' '!seen[$2]++ {print $2}')
Basically the script takes ensemble of CSV files belonged to the same prefix (defined as the naming pattern occured at the begining of the directory contained CSV, for example 10V1 from 10V1_cne_lig1) and calculate for it the mean value for the numbers in the third column:
# input *_filt.csv located in the folder 10V1_cne_lig1001
ID, POP, dG
1, 142, -5.6500
2, 10, -5.5000
3, 2, -4.9500
add 1 string to 10V1.csv, which is organized in 2 column format i) the name of the suffix of the folder with initial CSV; ii) the mean value calculated for all numbers in the third column (dG) of input.csv:
# this is two column format of output.csv: 10V1.csv
10V1: dG(mean)
lig1001: -5.37
in this way for 100 CSV filles such output.csv should contain 100 lines with the mean values, etc
I need to introduce a small modification to my AWK part of my routine that would add the 3rd column to the output CSV with RMSD value (as the measure of the differences between initial dG values) of the initial data (dG), which had been used to calculate the MEAN value. Using AWK syntax, with a particular MEAN value the RMS could be expressed as
mean=$(awk -F , '{sum+=$3}END{printf "%.2f", sum/NR}' $csv)
rmsd=$(awk -v mean=$mean '{++n;sum+=($NF-mean)^2} END{if(n) printf "%.2f", sqrt(sum/n)}' $csv)
Here is expected output for 5 means and 5 rmsds values calculated for 5 CSV logs (the first one is corresponded to my above example!):
10V1: dG(mean): RMSD (error)
lig1001 -5.37 0.30
lig1002 -8.53 0.34
lig1003 -6.57 0.25
lig1004 -9.53 0.00 # rmsd=0 since initial csv has only 1 line: no data variance
lig1005 -8.11 0.39
How this addition could be incorporated into my main bash-AWK code with the aim to add the third RMSD column (for each of the processed CSV, thus taking each of the calculated MEAN) to the output.csv?

You can calculate both of mean and rmsd within the awk code.
Would you please try the following awk code:
awk -v rescore="$rescore" '
FNR==1 {
if (n) { # calculate the results of previous file
m = s / n # mean
var = s2 / n - m * m # variance
if (var < 0) var = 0 # avoid an exception due to round-off error
mean[suffix] = m # store the mean in an array
rmsd[suffix] = sqrt(var)
}
prefix=suffix=FILENAME
sub(/_.*/, "", prefix)
sub(/\/[^\/]+$/, "", suffix)
sub(/^.*_/, "", suffix)
s = 0 # sum of $3
s2 = 0 # sum of $3 ** 2
n = 0 # count of samples
}
FNR > 1 {
s += $3
s2 += $3 * $3
++n
}
END {
out = rescore "/" prefix ".csv"
m = s / n
var = s2 / n - m * m
if (var < 0) var = 0
mean[suffix] = m
rmsd[suffix] = sqrt(var)
print prefix ":", "dG(mean)", "dG(rmsd)" > out
for (i in mean)
printf "%s: %.2f %.2f\n", i, mean[i], rmsd[i] >> out
close(out)
}'
Here is the version to print the lowest value of dG.
awk -v rescore="$rescore" '
FNR==1 {
if (n) { # calculate the results of previous file
m = s / n # mean
var = s2 / n - m * m # variance
if (var < 0) var = 0 # avoid an exception due to round-off error
mean[suffix] = m # store the mean in an array
rmsd[suffix] = sqrt(var)
lowest[suffix] = min
}
prefix=suffix=FILENAME
sub(/_.*/, "", prefix)
sub(/\/[^\/]+$/, "", suffix)
sub(/^.*_/, "", suffix)
s = 0 # sum of $3
s2 = 0 # sum of $3 ** 2
n = 0 # count of samples
min = 0 # lowest value of $3
}
FNR > 1 {
s += $3
s2 += $3 * $3
++n
if ($3 < min) min = $3 # update the lowest value
}
END {
if (n) { # just to avoid division by zero
m = s / n
var = s2 / n - m * m
if (var < 0) var = 0
mean[suffix] = m
rmsd[suffix] = sqrt(var)
lowest[suffix] = min
}
out = rescore "/" prefix ".csv"
print prefix ":", "dG(mean)", "dG(rmsd)", "dG(lowest)" > out
for (i in mean)
printf "%s: %.2f %.2f %.2f\n", i, mean[i], rmsd[i], lowest[i] > out
}' file_*.csv
I've assumed all dG values are negative. If there is any chance the
value is greater than zero, modify the line min = 0 which initializes
the variable to considerably big value (10,000 or whatever).
Please apply your modifications regarding the filenames, if needed.
The suggestions by Ed Morton are also included although the results will be the same.

Related

kmer counts with cython implementation

I have this function implemented in Cython:
def count_kmers_cython(str string, list alphabet, int kmin, int kmax):
"""
Count occurrence of kmers in a given string.
"""
counter = {}
cdef int i
cdef int j
cdef int N = len(string)
limits = range(kmin, kmax + 1)
for i in range(0, N - kmax + 1):
for j in limits:
kmer = string[i:i+j]
counter[kmer] = counter.get(kmer, 0) + 1
return counter
Can I do better with cython? Or Can I have any away to improve it?
I am new to cython, that is my first attempt.
I will use this to count kmers in DNA with alphabet restrict to 'ACGT'. The length of the general input string is the average bacterial genomes (130 kb to over 14 Mb, where each 1 kb = 1000 bp).
The size of the kmers will be 3 < kmer < 16.
I wish to know if I could go further and maybe use cython in this function to:
def compute_kmer_stats(kmer_list, counts, len_genome, max_e):
"""
This function computes the z_score to find under/over represented kmers
according to a cut off e-value.
Inputs:
kmer_list - a list of kmers
counts - a dictionary-type with k-mers as keys and counts as values.
len_genome - the total length of the sequence(s).
max_e - cut off e-values to report under/over represented kmers.
Outputs:
results - a list of lists as [k-mer, observed count, expected count, z-score, e-value]
"""
print(colored('Starting to compute the kmer statistics...\n',
'red',
attrs=['bold']))
results = []
# number of tests, used to convert p-value to e-value.
n = len(list(kmer_list))
for kmer in kmer_list:
k = len(kmer)
prefix, sufix, center = counts[kmer[:-1]], counts[kmer[1:]], counts[kmer[1:-1]]
# avoid zero division error
if center == 0:
expected = 0
else:
expected = (prefix * sufix) // center
observed = counts[kmer]
sigma = math.sqrt(expected * (1 - expected / (len_genome - k + 1)))
# avoid zero division error
if sigma == 0.0:
z_score = 0.0
else:
z_score = ((observed - expected) / sigma)
# pvalue for all kmers/palindromes under represented
p_value_under = (math.erfc(-z_score / math.sqrt(2)) / 2)
# pvalue for all kmers/palindromes over represented
p_value_over = (math.erfc(z_score / math.sqrt(2)) / 2)
# evalue for all kmers/palindromes under represented
e_value_under = (n * p_value_under)
# evalue for all kmers/palindromes over represented
e_value_over = (n * p_value_over)
if e_value_under <= max_e:
results.append([kmer, observed, expected, z_score, p_value_under, e_value_under])
elif e_value_over <= max_e:
results.append([kmer, observed, expected, z_score, p_value_over, e_value_over])
return results
OBS - Thank you CodeSurgeon by the help. I know there are other tools to count kmer efficiently but I am learning Python so I am trying to write my own functions and code.

How to calculate a probability vector and an observation count vector for a range of bins?

I want to test the hypothesis whether some 30 occurrences should fit a Poisson distribution.
#GNU Octave
X = [8 0 0 1 3 4 0 2 12 5 1 8 0 2 0 1 9 3 4 5 3 3 4 7 4 0 1 2 1 2]; #30 observations
bins = {0, 1, [2:3], [4:5], [6:20]}; #each bin can be single value or multiple values
I am trying to use Pearson's chi-square statistics here and coded the below function. I want a Poisson vector to contain corresponding Poisson probabilities for each bin and count the observations for each bin. I feel the loop is rather redundant and ugly. Can you please let me know how can I re-factor the function without the loop and make the whole calculation cleaner and more vectorized?
function result= poissonGoodnessOfFit(bins, observed)
assert(iscell(bins), "bins should be a cell array");
assert(all(cellfun("ismatrix", bins)) == 1, "bin entries either scalars or matrices");
assert(ismatrix(observed) && rows(observed) == 1, "observed data should be a 1xn matrix");
lambda_head = mean(observed); #poisson lambda parameter estimate
k = length(bins); #number of bin groups
n = length(observed); #number of observations
poisson_probability = []; #variable for poisson probability for each bin
observations = []; #variable for observation counts for each bin
for i=1:k
if isscalar(bins{1,i}) #this bin contains a single value
poisson_probability(1,i) = poisspdf(bins{1, i}, lambda_head);
observations(1, i) = histc(observed, bins{1, i});
else #this bin contains a range of values
inner_bins = bins{1, i}; #retrieve the range
inner_bins_k = length(inner_bins); #number of values inside
inner_poisson_probability = []; #variable to store individual probability of each value inside this bin
inner_observations = []; #variable to store observation counts of each value inside this bin
for j=1:inner_bins_k
inner_poisson_probability(1,j) = poisspdf(inner_bins(1, j), lambda_head);
inner_observations(1, j) = histc(observed, inner_bins(1, j));
endfor
poisson_probability(1, i) = sum(inner_poisson_probability, 2); #assign over the sum of all inner probabilities
observations(1, i) = sum(inner_observations, 2); #assign over the sum of all inner observation counts
endif
endfor
expected = n .* poisson_probability; #expected observations if indeed poisson using lambda_head
chisq = sum((observations - expected).^2 ./ expected, 2); #Pearson Chi-Square statistics
pvalue = 1 - chi2cdf(chisq, k-1-1);
result = struct("actual", observations, "expected", expected, "chi2", chisq, "pvalue", pvalue);
return;
endfunction
There's a couple of things worth noting in the code.
First, the 'scalar' case in your if block is actually identical to your 'range' case, since a scalar is simply a range of 1 element. So no special treatment is needed for it.
Second, you don't need to create such explicit subranges, your bin groups seem to be amenable to being used as indices into a larger result (as long as you add 1 to convert from 0-indexed to 1-indexed indices).
Therefore my approach would be to calculate the expected and observed numbers over the entire domain of interest (as inferred from your bin groups), and then use the bin groups themselves as 1-indices to obtain the desired subgroups, summing accordingly.
Here's an example code, written in the octave/matlab compatible subset of both languges:
function Result = poissonGoodnessOfFit( BinGroups, Observations )
% POISSONGOODNESSOFFIT( BinGroups, Observations) calculates the [... etc, etc.]
pkg load statistics; % only needed in octave; for matlab buy statistics toolbox.
assert( iscell( BinGroups ), 'Bins should be a cell array' );
assert( all( cellfun( #ismatrix, BinGroups ) ) == 1, 'Bin entries either scalars or matrices' );
assert( ismatrix( Observations ) && rows( Observations ) == 1, 'Observed data should be a 1xn matrix' );
% Define helpful variables
RangeMin = min( cellfun( #min, BinGroups ) );
RangeMax = max( cellfun( #max, BinGroups ) );
Domain = RangeMin : RangeMax;
LambdaEstimate = mean( Observations );
NBinGroups = length( BinGroups );
NObservations = length( Observations );
% Get expected and observed numbers per 'bin' (i.e. discrete value) over the *entire* domain.
Expected_Domain = NObservations * poisspdf( Domain, LambdaEstimate );
Observed_Domain = histc( Observations, Domain );
% Apply BinGroup values as indices
Expected_byBinGroup = cellfun( #(c) sum( Expected_Domain(c+1) ), BinGroups );
Observed_byBinGroup = cellfun( #(c) sum( Observed_Domain(c+1) ), BinGroups );
% Perform a Chi-Square test on the Bin-wise Expected and Observed outputs
O = Observed_byBinGroup; E = Expected_byBinGroup ; df = NBinGroups - 1 - 1;
ChiSquareTestStatistic = sum( (O - E) .^ 2 ./ E );
PValue = 1 - chi2cdf( ChiSquareTestStatistic, df );
Result = struct( 'actual', O, 'expected', E, 'chi2', ChiSquareTestStatistic, 'pvalue', PValue );
end
Running with your example gives:
X = [8 0 0 1 3 4 0 2 12 5 1 8 0 2 0 1 9 3 4 5 3 3 4 7 4 0 1 2 1 2]; % 30 observations
bins = {0, 1, [2:3], [4:5], [6:20]}; % each bin can be single value or multiple values
Result = poissonGoodnessOfFit( bins, X )
% Result =
% scalar structure containing the fields:
% actual = 6 5 8 6 5
% expected = 1.2643 4.0037 13.0304 8.6522 3.0493
% chi2 = 21.989
% pvalue = 0.000065574
A general comment about the code; it is always preferable to write self-explainable code, rather than code that does not make sense by itself in the absence of a comment. Comments generally should only be used to explain the 'why', rather than the 'how'.

python csv print first 10 rows only

I am working with a large CSV file with a lot of rows and columns. I need only the first 5 columns but only if the value for column 1 of each row is 1. (Column 1 can only have value 0 or 1).
So far I can print out the first 5 columns but can't filter to only show when column 1 is equal to 1. My .awk file looks like:
BEGIN {FS = ","}
NR!=1 {print $1", " $2", " $3", "$4", "$5}
I have tried things like $1>1 but to no luck, the output is always every row, regardless if the first column of each row is a 0 or 1.
Modifying your awk a bit:
BEGIN {FS = ","; OFS = ", "}
$1 == 1 {print $1, $2, $3, $4, $5; n++}
n == 10 {exit}

Awk: How to cut similar part of 2 fields and then get the difference of remaining part?

Let say I have 2 fields displaying epoch time in microseconds:
1318044415123456,1318044415990056
What I wanted to do is:
Cut the common part from both fields: "1318044415"
Get the difference of the remaining parts: 990056 - 123456 = 866600
Why am I doing this? Because awk uses floating point IEEE 754 but not 64 bit integers and I need to get difference of epoch time of 2 events in microseconds.
Thanks for any help!
EDIT:
Finally I found the largest number Awk could handle on Snow Leopard 10.6.8: 9007199254740992.
Try this: echo '9007199254740992' | awk -F ',' '{print $1 + 0}'
The version of Awk was 20070501 (produced by awk --version)
Here is an awk script that meets your requirements:
BEGIN {
FS = ","
}
{
s1 = $1
s2 = $2
while (length(s1) > 1 && substr(s1, 1, 1) == substr(s2, 1, 1))
{
s1 = substr(s1, 2)
s2 = substr(s2, 2)
}
n1 = s1 + 0
n2 = s2 + 0
print n2 - n1
}

Code Golf: Connecting the dots

Locked. This question and its answers are locked because the question is off-topic but has historical significance. It is not currently accepting new answers or interactions.
You may remember these drawings from when you were a child, but now it's time to let the computer draw them (in full ascii splendour). Have fun!
Description:
The input are multiple lines (terminated by a newline) which describe a 'field'. There are 'numbers' scattered across this field (seperated by whitespace). All lines can be considered to be the same length (you can pad spaces to the end).
the numbers always start at 1
they follow the ordering of the natural numbers: every 'next number' is incremented with 1
every number is surrounded by (at least) one whitespace on its left and right
Task:
Draw lines between these numbers in their natural order
(1 -> 2 -> 3 -> ...N) (assume N <= 99) with the following characteristics:
replace a number with a '+' character
for horizontal lines: use '-'
for vertical lines: use '|'
going left and down or right and up: /
going left and up or right and down: \
Important notes:
When drawing lines of type 4 and 5 you can assume (given the points to connect with coordinates x1, y1 and x2, y2) that distance(x1,x2) == distance(y1,y2). Or in other words (as user jball commented): "consecutive elements that are not horizontally or vertically aligned always align to the slope of the slash or backslash".
It is important to follow the order in which the dots are connected (newer lines can strike out older lines).
-- Sample input 1 --
8
7 6
10 9
5
3 4
11
12 13
1 2
-- Sample output 1 --
+
/|
/ +--+
+--------+ \
/ \
/ +
/ |
/ +--+
+ |
\ |
+------------------------+
+--------------------------+
-- Sample input 2 --
64
63
62 61
1 65
66 57 58
2 56 59 45
67 55 46
3 44
54 60 47
53 52 49 48
4 51 50 43
5 42
41
6 23
22 25 26 40
20 21 24 34
7 13 12 33
19 27 32
14 35
8 15
16
39
17 18 28 31 36
9 38
10 11 29 30 37
-- Sample output 2 -- (unicorn reference)
+
/+
//
//
//
/+--+
+ + \
| + +-\+
+ \ + \ +
/ + + \ +\
+ \ \ | +
| + + +/
| +--+ +-------+/
+ +--+ +
/ \
+ +
| +
+ + /
\ +\ +---+ +
\ +--+ + \ /+
+ +--+ / \ /+|
/ | |+ + /+ |
/ + || / // +
+ + || / // /
\ + || / // /
\ | || / +/ /
\ +---+ + +\ +
+ | | | +|
+--+ +---+ +
Winner:
Shortest solution (by code character count). Input can be read via standard input.
Commodore 64 BASIC - 313 chars
EDIT: See below for the golfed version
A little trip down the memory lane with PET graphics, POKEs and PEEKs and everything :)
The program operates directly in the screen memory, so you just go ahead, clear the screen, place your dots, and type RUN:
You have to wait a minute or so while it finds the dots and then it starts to draw. It isn't fast - you can actually see the lines being drawn, but that's the coolest part :)
Golfed version:
Commodore BASIC seems like a great language for golfing, because it doesn't require whitespace :) You can also shorten most of the commands by entering an unshifted first letter followed by a shifted second letter. For example, POKE can be typed as P[SHIFT+O], which appears as P┌ on the screen:
Perl, 222 char (211)
Perl, 384 365 276 273 253 225 222 218 211 chars (222 when contest ended). Newlines are for "readability" only and are not included in the character count.
Last edit: no longer overwriting $", and printing #S directly
$_=join'',#S=map{$n=s/$/$"x97/e;(/./g)[0..95],$/}<>;
while(/\b$n /){$S[$q=$-[0]]='+';($P,$Q)=sort{$a-$b}$q,$p||$q;
for(qw'\98 |97 /96 -1'){/\D/;$S[$P]=$&until($Q-$P)%$'||$Q<=($P+=$')}
$n++;$p=$q}s/\d/ /,print for#S
Explanation:
$_=join'',#S=map{$n=s/$/$"x97/e;(/./g)[0..95],$/}<>;
This task will be easier if all the lines are the same length (say, 97 characters).
This statement takes each line of input, replaces the end-of-line character with
96 spaces, then pushes the first 96 characters plus a newline into the array #S.
Note we are also setting $n=1, as 1 is the first number we'll look for in
the input.
The join statement creates a single string from the array #S.
It is more convenient to use the scalar variable $_ for pattern matching, and more convenient to use the array #S for making updates to the picture.
while(/\b$n /){
Search for the number $n in the variable $_. Evaluating regular expressions in Perl
has several side-effects. One is to set the special variable $-[0] with the position of the start of the matched pattern within the matched string. This gives us the position of the number $n in the string $_ and also the array #S.
Of course, the loop will end when $n is high enough that we can't find it in the input.
$S[$q=$-[0]]='+';
Let $q be the position of the number $n in the string $_ and the array #S,
and assign the character '+' at that position.
$P=($p||=$q)+$q-($Q=$q>$p?$q:$p)
($P,$Q)=sort{$a-$b}$p||$q,$q;
The first time through the loop, set $p to $q. After the
first time, $p will hold the previous value of $q (which
will refer to the position in the input of the previous number).
Assign $P and $Q such that $P=min($p,$q),
$Q=max($p,$q)
for(qw'\98 |97 /96 -1'){
By construction, consecutive numbers are either
connected by a vertical line. Since the input is constructed
to have 97 characters on each line, this case means that
$p-$q is divisible by 97.
"aligned to the slope of a backslash", which would make
$p-$q divisible by 98
"aligned to the slope of a forward slash", which would make
$p-$q divisible by 96
on the same horizontal line
The elements of this list encode the possible number of positions
between line segments, and the character to encode that segment.
/\D/;
Another trivial regex evaluation. As a side-effect, it sets the
special variable $& (the MATCH variable) to the line segment
character (\ | / or -) and $' (the POSTMATCH variable) to
the number (98 97 96 or 1) encoded in the list element.
$S[$P]=$&until($Q-$P)%$'||$Q<=($P+=$')
This statement draws the line segment between two numbers.
If $Q-$P is divisible by $', then keep incrementing $P by $'
and assigning the character $& to $S[$P] until $P reaches $Q.
More concretely, for example if $Q-$P is divisible by 97, then
increment $P by 97 and set $S[$P]='|'. Repeat until $P>=$Q.
$n++;$p=$q
Prepare for the next iteration of the loop. Increment $n to the
next number to search for in the input, and let $p hold the
position of the previous number.
s/\d/ /,print for#S
Output the array, converting any leftover digits (from double
digit identifiers in the input where we only overwrote the first
digit with a '+') to spaces as we go.
MS-DOS Batch (yes, you read right!)
I often hear (or read) people say batch isn't very powerful and you can't do much with them, well to them I say, behold, the power of BATCH!
The actual script (script.bat):
set file=%~1
call :FindNextNum 1
for /F "tokens=2 delims=:" %%i IN ('find /c /V "" "%file%"') DO set /a totalLines=%%i
set maxLen=0
for /F "delims=" %%i IN (%file%) DO (
call :CountChars "%%i"
if /i !charCount! gtr !maxLen! set maxLen=!charCount!
)
for /L %%i IN (0,1,%totalLines%) DO set "final_%%i=" & for /L %%j IN (0,1,%maxLen%) DO set "final_%%i=!final_%%i! "
:MainLoop
set currLineNum=%lineNum%
set currCol=%linePos%
set currNum=%nextNum%
set /a targetNum=%currNum%+1
call :FindNextNum %targetNum%
if "%nextNum%"=="" goto MainEnd
REM echo %currNum% -^> %nextNum%
if /I %currLineNum% lss %lineNum% (
call :DrawLine %currCol% %currLineNum% %linePos% %lineNum%
) else (
call :DrawLine %linePos% %lineNum% %currCol% %currLineNum%
)
goto MainLoop
:MainEnd
for /L %%i IN (0,1,%totalLines%) DO echo.!final_%%i!
goto:eof
:DrawLine
if /I %2 equ %4 goto:DrawHoriz
set "char=" & set "pos=%1" & set "inc=0"
if /I %1 LSS %3 set "char=\" & set "pos=%1" & set "inc=1"
if /I %1 GTR %3 set "char=/" & set "pos=%1" & set "inc=-1"
for /L %%i IN (%2,1,%4) DO call :DrawChar %%i !pos! %char% & set /a "pos+=%inc%"
goto:DrawEnds
:DrawHoriz
set "start=%1+1" & set "end=%3"
if /I %start% gtr %end% set "start=%3+1" & set "end=%1"
set /a lineEnd=%end%+1
set lineEnd=!final_%2:~%lineEnd%!
for /L %%i IN (%start%,1,%end%) DO set final_%2=!final_%2:~0,%%i!-
set final_%2=!final_%2!!lineEnd!
:DrawEnds
call :DrawChar %2 %1 +
call :DrawChar %4 %3 +
goto:eof
:DrawChar
set /a skip2=%2+1
if "%3"=="" (
set final_%1=!final_%1:~0,%2!^|!final_%1:~%skip2%!
) else (
set final_%1=!final_%1:~0,%2!%3!final_%1:~%skip2%!
)
goto:eof
:CountChars
set charCount=0
set val=%~1
:CountChars_loop
if not "%val:~1%"=="" (
set /a charCount+=1
set val=!val:~1!
goto CountChars_loop
)
goto:eof
:FindNextNum
for /F "delims=" %%i IN ('type "%file%" ^| find /V /N ""') DO (
for /F "tokens=1,2 delims=[]" %%j IN ("%%i") DO (
set /a lineNum=%%j-1
call :FindNext_internal "%%k" %1
if /I !nextNum! equ %1 goto :eof
)
)
goto:eof
:FindNext_internal
set currLine=%~1
set linePos=0
:FindNext_internal_loop
call :NextNumInLine "%currLine%"
set /a linePos+=%spaceInterval%
if "%nextNum%"=="" goto :EOF
if /I %nextNum% equ %2 goto :EOF
set /a spaceInterval+=1
set /a linePos+=1
if /I %nextNum% GTR 9 set /a "spaceInterval+=1" & set /a linePos+=1
set currLine=!currLine:~%spaceInterval%!
goto FindNext_internal_loop
:NextNumInLine
set nextNum=
for /F %%i IN (%1) DO set /a nextNum=%%i
if "%nextNum%"=="" goto :eof
set /a spaceInterval=0
set val=%~1
:NextNumInLine_loop
if "%val:~0,1%"==" " (
set /a spaceInterval+=1
set val=!val:~1!
goto NextNumInLine_loop
)
goto :eof
And this is how you call it
echo off
setlocal ENABLEDELAYEDEXPANSION
call script.bat input.txt
where "input.txt" is a file that contains the input for the "program".
P.S. This isn't actually optimized for line length yet, I've already spent a couple of hours getting to this point and now I need to sleep... I'll see if I can improve it tomorrow (currently 'script.bat' sits at 2755 bytes)
Rebmu: 218 chars
Ma L{-|\/}Qb|[sg?SBaB]Da|[feSm[TfiSrj[spAsp]iT[++Tbr]]t]Xa|[i?A]Ya|[i?FImHDa]Ca|[skPCmSCaBKfsA]wh[Jd++N][roG[xJyJ]]Bf+GwhB[JcB Ff+GiF[KcF HqXkXj VqYkYju[chCbPClEZv1[ezH2[eeHv3 4]]e?A+bRE[hV]f]]chJeFIlSCj{+}{+ }Jk Bf]wM
I'm getting pretty good at reading and editing it natively in its pig-latin form. (Though I do use line breaks!!) :)
But here's how the dialect is transformed by the interpreter when the case-insensitive "mushing" trick is boiled away, and one gets accustomed to it. I'll add some comments. (Tips: fi is find, fe is foreach, sp is a space character, i? is index, hd is head, ch is change, sk is skip, pc is pick, bk is break, i is if, e is either, ee is either equal, ad nauseum)
; copy program argument into variable (m)atrix
m: a
; string containing the (l)etters used for walls
l: {-|\/}
; q is a "b|function" (function that takes two parameters, a and b)
; it gives you the sign of subtracting b from a (+1, -1, or 0)
q: b| [sg? sb a b]
; d finds you the iterator position of the first digit of a two digit
; number in the matrix
d: a| [fe s m [t: fi s rj [sp a sp] i t [++ t br]] t]
; given an iterator position, this tells you the x coordinate of the cell
x: a| [i? a]
; given an iterator position, this tells you the y coordinate of the cell
y: a| [i? fi m hd a]
; pass in a coordinate pair to c and it will give you the iterator position
; of that cell
c: a| [sk pc m sc a bk fr a]
; n defaults to 1 in Rebmu. we loop through all the numbers up front and
; gather their coordinate pairs into a list called g
wh [j: d ++ n] [ro g [x j y j]]
; b is the (b)eginning coordinate pair for our stroke. f+ returns the
; element at G's current position and advances G (f+ = "first+")
; advance g's iteration position
b: f+ g
wh b [
; j is the iterator position of the beginning stroke
j: c b
; f is the (f)inishing coordinate pair for our stroke
f: f+ g
; if there is a finishing pair, we need to draw a line
i f [
; k is the iterator position of the end of the stroke
k: c f
; the (h)orizontal and (v)ertical offsets we'll step by (-1,0,1)
h: q x k x j
v: q y k y j
u [
; change the character at iterator location for b (now our
; current location) based on an index into the letters list
; that we figure out based on whether v is zero, h is zero,
; v equals h, or v doesn't equal h.
ch c b pc l ez v 1 [ez h 2 [ee h v 3 4]]
; if we update the coordinate pair by the offset and it
; equals finish, then we're done with the stroke
e? a+ b re [h v] f
]
]
; whether we overwrite the number with a + or a plus and space
; depends on whether we detect one of our wall "letters" already
; one step to the right of the iterator position
ch j e fi l sc j {+} {+ }
; update from finish pair to be new begin pair for next loop iteration
j: k
b: f
]
; write out m
w m
Both the language and sample are new and in an experimental stage. For instance, ad couldn't be used to add together vectors and matrices before I changed it to help with this sample. But I think that's just the sort of thing that a language designed specifically for code golf has to have anyway. It's a subtle line between "language" and "library".
Latest source with comments available on GitHub
Haskell, 424 chars
Current char count: 424 430 451 466 511 515 516 518 525 532 541 545 550 556 569 571 577 582 586 592.
import List
x%c=[(i,c)|i<-x]
l k p q|p>q=l k q p|True=head[[p,p+j..q]%c|m<-zip[k-1,k,k+1,1]"/|\\-",let (j,c)=m,mod(q-p)j==0]
w=map snd
q(k,m,x)z=w$sort$nubBy((==)&fst)$x%'+'++(concat$zipWith(l k)x$tail x)++z%'\n'++[1..m]%' '
r(z,m,x)=q(last z,m-1,w$sort x)z
u[(m,_)]n x=(-m::Int,n):x;u _ _ x=x
t(z,n,x)s|s=="\n"=(n:z,n+1,x)|True=(z,n+length s,u(reads s)n x)
y&x=(.x).y.x
main=interact$r.foldl t([],1,[]).groupBy((&&)&(>' '))
This version takes a lot of inspiration from the original Haskell entry below, but makes some significant changes. Most importantly, it represents image locations with a single index, not a pair of coordinates.
There are some changes:
The input must now have all lines padded to the same length (allowed by the rules.)
No longer needs either language extension
Original version:
(Needs -XTupleSections, and maybe -XNoMonomorphismRestriction)
import List
b=length
f=map
g=reverse
a(x,y)" "=(x,y+1)
a(x,y)z=([y,read z]:x,y+b z)
x%y=[min x y+1..max x y-1]
j([x,y],[w,z])|y==z=f(,'-')$f(y,)$x%w|x==w=f(,'|')$f(,x)$y%z|(y<z)==(x<w)=f(,'\\')$zip(y%z)$x%w|True=f(,'/')$zip(y%z)$g$x%w
k 0='\n'
k _=' '
y&x=(.x).y.x
y?x=f y.sort.x.concat
r z=snd?(nubBy((==)&fst).g)$[((y,x),k x)|x<-[0..maximum$f b d],y<-[1..b d]]:[((y,x),'+')|[x,y]<-e]:(f j$zip e$tail e)where d=f(groupBy$(&&)&(>' '))$lines z;e=tail?f g$zipWith(f.(:))[1..]$f(fst.foldl a([],1))d
main=interact r
Explanation:
(1) d=...: Splits the input into spaces and numbers, e.g.
z = " 6 5\n\n1 2\n\n 4 3\n\n 7"
=> d = [[" ","6"," "," ","5"],[],["1"," "," "," "," "," "," "," ","2"],[],[" "," "," "," ","4"," "," "," ","3"],[],[" ","7"]]
(2) e=...: Converts d into a list of (y, x) coordinates for each number.
e = [[1,3],[9,3],[9,5],[5,5],[5,1],[2,1],[2,7]]
--- // 1 2 3 4 5 6 7
(3)
[((y,x),k x)|...] is an empty board. (k returns a space or a \n depending on the x-coordinate.)
[((y,x),'+'))|...] are the plus signs at the numbers.
(f j$zip e$tail e) are the lines connecting the numbers. (j maps a pair of coordinates into a list of (coordinate, character) which represents a line.)
These 3 components are concatenated and filtered to form the actual output. Note that the order is important, so that nubBy(...).g can only keep the last character in the same location.
AWK - 296 317 321 324 334 340
Not a prize winner (yet), but I am pleased with the effort (line breaks for display). This new version uses VT-100 escape sequences. The '^[' is just one character, Escape!!! Cut and paste will not work with this version, since the sequence "^[" has to be replaced with the real ESC character. To make it forum friendly, ESC could be specified as "\0x1b", but it takes too much space...
BEGIN{FS="[ ]"}{for(j=i=0;i<NF;j+=length(g)){if(g=$++i){x[g]=k=i+j;y[g]=NR;
m=m>k?m:k}}}END{printf"^[[2J[%d;%dH+",Y=y[i=1],X=x[1];while(a=x[++i])
{a-=X;b=y[i]-Y;t=a?b?a*b>0?92:47:45:124;A=a?a>0?1:-1:0;B=b?b>0?1:-1:0;
for(r=a?a*A:b*B;--r;){printf"^[[%d;%dH%c",Y+=B,X+=A,t}
printf"^[[%d;%dH+",Y+=B,X+=A}}
The older standard version
BEGIN{FS="[ ]"}{for(j=i=0;i<NF;j+=length(g)){if(g=$++i){x[g]=k=i+j;y[g]=NR;
m=m>k?m:k}}}END{q[X=x[1],Y=y[i=1]]=43;while(a=x[++i]){a-=X;b=y[i]-Y;
t=a?b?a*b>0?92:47:45:124;A=a?a>0?1:-1:0;B=b?b>0?1:-1:0;for(r=a?a*A:b*B;--r;
q[X+=A,Y+=B]=t);q[X+=A,Y+=B]=43}for(j=0;++j<NR;){for(i=0;i<m;){t=q[i++,j];
printf"%c",t?t:32}print}}
Now a little explanation
# This will break the input in fields separated by exactly 1 space,
# i.e. the fields will be null or a number.
BEGIN{FS="[ ]"}
# For each line we loop over all fields, if the field is not null
# it is a number, hence store it.
# Also account for the fact the numbers use space.
# Also, find the maximum width of the line.
{
for(j=i=0;i<NF;j+=length(g)){
if(g=$++i){
k=j+i;x[g]=k;y[g]=NR;m=m>k?m:k
}
}
}
# Once we have all the data, let start cooking.
END{
# First, create a matrix with the drawing.
# first point is a +
q[X=x[1],Y=y[i=1]]=43;
# loop over all points
while(a=x[++i]){
# Check next point and select character
# If a == 0 -> -
# If b == 0 -> |
# If a and b have same sign -> \ else /
a-=X;b=y[i]-Y;t=a?b?a*b>0?92:47:45:124;
# there is no sgn() function
A=a?a>0?1:-1:0;B=b?b>0?1:-1:0;
# Draw the line between the points
for(k=0;++k<(a?a*A:b*B);){
q[X+=A,Y+=B]=t
}
# store + and move to next point
q[X+=A,Y+=B]=43
}
# Now output all lines. If value in point x,y is 0, emit space
for(j=0;++j<NR;){
for(i=0;i<m;){
t=q[i++,j];printf("%c",t?t:32)
}
print
}
}
C, 386
402 386 character in C. Newlines after the first are only for readability.
#include <stdio.h>
int x[101],y[101],c=1,r,w,h,b,i,j,k,m,n;
int main(){
while((b=getchar())-EOF)
b-' '?b-'\n'?ungetc(b,stdin),scanf("%d",&b),x[b]=c++,y[b]=h,c+=b>9:(w=c>w?c:w,++h,c=1):++c;
for(r=0;r<h&&putchar('\n');++r)
for(c=0;c<w;++c){
for(b=' ',i=2,m=x[1]-c,n=y[1]-r;j=m,k=n,m=x[i]-c,n=y[i]-r,x[i++];)
b=j|k&&m|n?j*m>0|k|n?k*n<0?(j-k|m-n?j+k|m+n?j|m?b:'|':'/':'\\'):b:'-':'+';
putchar(b);
}
}
Intel Assembler
Assembled size: 506 bytes
Source: 2252 bytes (hey, it's not a trivial problem this one)
To Assemble: Use A86
To Run: Tested with a WinXP DOS box. Invocation jtd.com < input > output
mov ax,3
int 10h
mov ax,0b800h
mov es,ax
mov ah,0bh
int 21h
mov bx,255
cmp al,bl
mov dh,bh
mov si,offset a12
push offset a24
je a1
mov si,offset a14
a1: inc bl
a2: mov dl,255
call si
cmp al,10
jb a4
a3: cmp al,10-48
jne a1
inc bh
mov bl,dh
jmp a2
a4: mov dl,al
call si
cmp al,10
jae a5
mov ah,dl
aad
mov dl,al
a5: mov di,dx
mov ch,al
shl di,2
mov [di+a32],bx
cmp bl,[offset a30]
jb a6
mov [offset a30],bl
a6: cmp bh,[offset a31]
jb a7
mov [offset a31],bh
a7: push offset a19
mov al,80
mul bh
add al,bl
adc ah,0
add ax,ax
lea di,[di+2+a32]
mov [di],ax
add di,2
cmp di,[a22-3]
jbe a8
mov [a22-3],di
mov [a25-3],di
a8: mov di,ax
mov al,dl
aam
cmp ah,0
je a10
a9: add ah,48
mov es:[di],ah
add di,2
a10:add al,48
mov es:[di],al
mov al,ch
inc bl
jmp a3
a11:jmp si
a12:mov ah,0bh
int 21h
cmp al,255
jne a15
mov ah,8
int 21h
a13:cmp al,13
je a11
sub al,48
ret
a14:mov ah,1
int 21h
cmp al,26
jne a13
mov si,offset a15
ret
a15:cmp dl,255
je a16
mov al,32
ret
a16:mov si,offset a32 + 4
lodsw
mov cx,ax
mov dx,ax
lodsw
mov di,ax
mov b es:[di],1
mov bp,0f000h
call a26
add sp,6
mov bx,[a22-3]
mov ax,[offset a31]
inc ax
a17:mov bp,[offset a30]
a18:mov b[bx],32
inc bx
dec bp
jnz a18
mov w[bx],0a0dh
add bx,2
dec ax
jnz a17
mov b[bx],'$'
add w[a30],2
a19:lodsw
xchg ax,dx
cmp ah,dh
lahf
mov bl,ah
cmp al,dl
lahf
shr bl,6
shr ah,4
and ah,12
or bl,ah
mov bh,0
shl bx,3
a20:mov b es:[di],43
a21:mov al,b[a30]
mul ch
add al,cl
adc ah,0
mov bp,ax
mov b[bp+100h],43
a22:add di,[bx + a29]
add cl,[bx + a29 + 4]
add ch,[bx + a29 + 6]
mov b es:[di],1
mov al,[bx + a29 + 2]
mov [a21-1],al
mov [a22-1],al
mov bp,01000h
call a26
cmp di,[si]
jne a20
mov al,es:[di+2]
sub al,48
cmp al,10
jae a23
mov b es:[di+2],0
a23:mov b[a21-1],43
mov b[a22-1],43
mov b es:[di],43
lodsw
ret
a24:mov al,b[a30]
mul ch
add al,cl
adc ah,0
mov bp,ax
mov b[bp+100h],43
a25:mov dx,[a22-3]
mov ah,9
int 21h
ret
a26:pusha
a27:mov cx,0ffffh
a28:loop a28
dec bp
jnz a27
popa
ret
a29:dw -162,92,-1,-1,-2,45,-1,0,158,47,-1,1,0,0,0,0,-160,124,0,-1
a30:dw 0
a31:dw 0,0,0,160,124,0,1,0,0,0,0,-158,47,1,-1,2,45,1,0,162,92,1,1
a32:
Interesting features: self modifying code, animated output (the second example works, but is too big to display), abuse of 'ret' to implement a loop counter, interesting way of determining line/movement direction.
F#, 725 chars
open System
let mutable h,s,l=0,Set.empty,Console.ReadLine()
while l<>null do
l.Split([|' '|],StringSplitOptions.RemoveEmptyEntries)
|>Seq.iter(fun t->s<-s.Add(int t,h,(" "+l+" ").IndexOf(" "+t+" ")))
h<-h+1;l<-Console.ReadLine()
let w=Seq.map(fun(k,h,x)->x)s|>Seq.max
let o=Array2D.create h (w+1)' '
Seq.sort s|>Seq.pairwise|>Seq.iter(fun((_,b,a),(_,y,x))->
let a,b,x,y=if b>y then x,y,a,b else a,b,x,y
o.[b,a]<-'+'
o.[y,x]<-'+'
if b=y then for x in(min a x)+1..(max a x)-1 do o.[y,x]<-'-'
elif a=x then for h in b+1..y-1 do o.[h,x]<-'|'
elif a<x then for i in 1..y-b-1 do o.[b+i,a+i]<-'\\'
else for i in 1..y-b-1 do o.[b+i,a-i]<-'/')
for h in 0..h-1 do
for x in 0..w do printf"%c"o.[h,x]
printfn""
Legend:
h = height
s = set
l = curLine
w = (one less than) width
o = output array of chars
Lines 1-6: I keep a set of (number, lineNum, xCoord) tuples; as I read in each line of input I find all the numbers and add them to the set.
Line 7-8: Then I create an array of output chars, initialized to all spaces.
Line 9: Sort the set (by 'number'), then take each adjacent pair and ...
Lines 10-16: ... sort so (a,b) is the 'highest' of the two points and (x,y) is the other. Put the '+' signs, and then if horizontal, draw that, else if vertical, draw that, else draw the correct diagonal. If the input is not 'valid', then who knows what happens (this code was littered with 'asserts' before I golf-ized it).
Lines 17-19: Print the result
Powershell, 328 304 characters
$i=$l=0;$k=#{}
$s=#($input|%{[regex]::matches($_,"\d+")|%{$k[1*$_.Value]=#{y=$l
x=$_.Index}};$l++;""})
while($a=$k[++$i]){
if($i-eq1){$x=$a.x;$y=$a.y}
do{$d=$a.x.CompareTo($x);$e=$a.y.CompareTo($y)
$s[$y]=$s[($y+=$e)].PadRight($x+1).Remove($x,1).Insert(($x+=$d),
"\-/|+|/-\"[4+$d*3+$e])}while($d-or$e)}$s
and here's a pretty-printed version with comments:
# Usage: gc testfile.txt | dots.ps1
$l=$i=0 # line, dot index (used below)
$k=#{} # hashtable that maps dot index to coordinates
# Apply regular expression to each line of the input
$s=#( $input | foreach{
[regex]::matches($_,"\d+") | foreach{
# Store each match in the hashtable
$k[ 1*$_.Value ] = #{ y = $l; x = $_.Index }
}
$l++; # Next line
"" # For each line return an empty string.
# The strings are added to the array $s which
# is used to produce the final output
}
)
# Connect the dots!
while( $a = $k[ ++$i ] )
{
if( $i -eq 1 ) # First dot?
{
# Current position is ($x, $y)
$x = $a.x;
$y = $a.y
}
do
{
$d = $a.x.CompareTo( $x ) # sign( $a.x - $x )
$e = $a.y.CompareTo( $y ) # sign( $a.y - $y )
$c = '\-/|+|/-\'[ 4 + $d * 3 + $e ] # character '
# Move
$x += $d
$y += $e
# "Replace" the charcter at the current position
# PadRight() ensures the string is long enough
$s[ $y ]=$s[ $y ].PadRight( $x+1 ).Remove( $x, 1 ).Insert( $x, $c )
} while( $d -or $e ) # Until the next dot is reached
}
# Print the resulting string array
$s
Python - 381
import re
b=list(iter(raw_input,''))
c=sum((zip([i]*999,re.finditer('\\d+',x))for i,x in enumerate(b)),[])
d=sorted((int(m.group()),i,m.start())for i,m in c)
e=[[' ']*max(map(len,b))for x in b]
for(t,u,v),(x,y,z)in zip(d,d[1:]+d[-1:]):
e[u][v]='+'
while u!=y or v!=z:i,j=(u<y)-(u>y),(v<z)-(v>z);u+=i;v+=j;e[u][v]=['|','/\\-'[(i==j)+2*(i==0)]][j!=0]
print'\n'.join(map(''.join,e))
C#, 422 chars
758 754 641 627 584 546 532 486 457 454 443 440 422 chars (next time maybe I won't submit so soon.)
using A=System.Console;class B{static int C,o,d,e,G,O=1,f,F,u,n;static
void Main(){var s=A.In.ReadToEnd();A.Clear();while(++u<s.Length){f++;if
(s[u]<32){u++;F++;f= 0;}if(s[u]>32){if(int.Parse(s[u]+""+s[++u])==O){o=
e>f?1:f>e?-1:0;C=d>F?1:F>d?-1:0 ;G=e+o;n=d+C;if(O++>1)while(n!=F||G!=f)
{A.SetCursorPosition(G-=o,n-=C);A.Write( "+/-|\\"[n==d&&G==e?0:n==F&&G
==f?0:C+o==0?1:C==0?2:o==0?3:4]);}e=f;d=F;F=0;f=u=-1 ;}f++;}}A.Read();}}
Usage: run, paste (or type) the input, ensure the last line is terminated, press CTRL-Z or F6, press Enter.
Formatted but still basically unintelligable version:
using A = System.Console;
class B
{
// code golf fun!
static int C, o, d, e, G, O = 1, f, F, u, n;
static void Main()
{
// read the input into a string char by char until EOF
var s = A.In.ReadToEnd();
A.Clear(); // clear console, ready to draw picture
// O is the "dot" number we're looking for
// f is current column
// F is current row
// loop over the field looking for numbers sequentially
// until no more are found
while (++u < s.Length)
{
f++;
// any char <32 is expected to be a CR/LF
// increment the current row and reset the current column
if (s[u] < 32)
{
u++; // skip the other half of the CR/LF pair
F++; // next row
f = 0; // column reset
}
// any char >32 is expected to be a number
if (s[u] > 32)
{
// parse the current + next char and see if it's
// the number we want
if (int.Parse(s[u] + "" + s[++u]) == O)
{
// set up coordinates, compare X1 with X2
// and Y1 with Y2 to figure out line direction
// horizontal direction (same as o=e.CompareTo(f))
o = e > f ? 1 : f > e ? - 1 : 0;
// vertical direction (same as C=d.CompareTo(F))
C = d > F ? 1 : F > d ? - 1 : 0;
// initial offsets compensate for off-by-one
G = e + o;
n = d + C;
// draw the line (except for the very first dot)
if (O++ > 1)
while (n != F || G != f)
{
// update coords and write desired char
A.SetCursorPosition(G -= o, n -= C);
// this lovely line decides which char to
// print, and prints it
A.Write(
"+/-|\\"[n == d && G == e ? 0 : n == F && G
== f ? 0 : C + o == 0 ? 1 : C == 0 ? 2 : o
== 0 ? 3 : 4]);
}
// remember end point of this line, to use as start point
// of next line
e = f;
d = F;
// reset current row (F), column (f), field position (u)
F = 0;
f = u = -1;
}
// bump current column because we parse 2 chars when we
// find a dot
f++;
}
}
A.Read(); // prevent command prompt from overwriting picture
}
}
Here goes!
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int sign(int x) {
if (x < 0)
return -1;
if (x > 0)
return +1;
return 0;
}
#define MAX_ROWS 100
#define MAX_COLS 100
#define MAX_DIGITS 100
int main(void)
{
// Read in the digits
int number[MAX_DIGITS][2];
int rows = 0;
int cols = 0;
char row[MAX_COLS];
int maxvalue = 0;
int i, j, value, x;
for (i = 0; i < MAX_ROWS; i++) {
if (row != fgets(row, MAX_COLS, stdin))
break;
value = 0;
for (j=0; row[j] != 0; j++) {
if (row[j] >= '0' && row[j] <= '9') {
x = j;
value = 0;
do {
value = 10*value + (row[j]-'0');
j++;
} while (row[j] >= '0' && row[j] <= '9');
number[value][0] = i;
number[value][1] = x;
if (maxvalue < value) maxvalue = value;
if (rows < i+1) rows = i+1;
if (cols < x+1) cols = x+1;
}
}
}
// Create an empty field
char field[rows][cols];
memset(field, ' ', rows*cols);
char lines[] = "\\|/-+-/|\\";
int dr,dc;
// Draw the numbers and lines
field[number[1][0]][number[1][1]] = '+';
for (i = 2; i <= maxvalue; ++i) {
int r = number[i-1][0];
int c = number[i-1][1];
int rt = number[i][0];
int ct = number[i][1];
dr = sign(rt-r);
dc = sign(ct-c);
char line = lines[(dr+1)*3+dc+1];
while (r != rt || c != ct) {
r += dr;
c += dc;
field[r][c] = line;
}
field[r][c] = '+';
}
for (i = 0; i < rows; ++i) {
for (j = 0; j < cols; ++j)
putchar(field[i][j]);
putchar('\n');
}
return 0;
}
C#, 638 chars
using System;
using System.Linq;
using System.Text.RegularExpressions;
class C
{
static void Main()
{
int i=0,j;
var p = Console.In.ReadToEnd()
.Split('\n')
.SelectMany(
r =>
{
i++; j =0;
return Regex.Matches(r, "\\s+(\\d+)").Cast<Match>()
.Select(m => { j += m.Length; return new { X = j, Y = i-1, N = int.Parse(m.Groups[1].Value) }; });
}
).OrderBy(a=>a.N).ToList();
var W = p.Max(a => a.X)+1;
var k = new char[W*i+W];
i = 0;
while (i < p.Count)
{
var b = p[i > 0 ? i - 1 : 0]; var a = p[i];
int h = a.Y - b.Y, w = a.X - b.X;
var s = "|-/\\"[h == 0 ? 1 : w == 0 ? 0 : h / w > 0 ? 3 : 2];
while ((h | w) != 0) { k[b.X + w + W * (b.Y + h)] = s; h -= h.CompareTo(0); w -= w.CompareTo(0); }
k[a.X + a.Y * W] = '+';
k[W * ++i] = '\n';
}
Console.Write(k);
}
}
I cannot do multi-line in a comment, so I will demonstrate here.
In the following examples, distance(x1,x2) == distance(y1,y2):
+
|\
+-+
+
|\
| \
+--+
+
|\
| \
| \
+---+
With the rules as explained, distance(x1,x2) == distance(y1,y2)+2:
+\
| \
+--\+
+\
| \
| \
+---\+
+\
| \
| \
| \
+----\+
C++ 637
#include <iostream>
#include <string>
#include <vector>
#define S(x)((x)<0?-1:x>0?1:0)
using namespace std;enum{R=100,C=100,D=100};int main(){string s;
int N[D][2],M=0,q=0,p=0,i,j,V,L,a,b;for(i=0;j=0,(i<R)&&getline(cin,s);i++)
while((j=s.find_first_not_of(" ",j))<=s.size()){L=sscanf(&s[j],"%d",&V);
N[V][0]=i;N[V][1]=j;if(M<V)M=V;if(q<=i)q=i+1;if(p<=j)p=j+1;j+=L+1;}
string F(q*p,' '),l="\\|/-+-/|\\";F[p*N[1][0]+N[1][1]]='+';for(i=2;i<=M;++i){
int r=N[i-1][0],c=N[i-1][1],d=N[i][0],e=N[i][1];for(a=S(d-r),b=S(e-c);r!=d||c!=e;)
r+=a,c+=b,F[p*r+c]=l[(a+1)*3+b+1];F[p*r+c]='+';}for(i=0;i<q;i++)
cout<<string(&F[i*p],p)+"\n";}
Indented, and with a few slightly more meaningful names, that looks like:
#include <iostream>
#include <string>
#include <vector>
#define S(x)((x)<0?-1:x>0?1:0)
using namespace std;
enum{R=100,C=100,D=100};
int main(){
string s;
int N[D][2],M=0,rs=0,cs=0,i,j,V,L,dr,dc;
for(i=0;j=0,(i<R)&&getline(cin,s);i++)
while((j=s.find_first_not_of(" ",j))<=s.size()){
L=sscanf(&s[j],"%d",&V);
N[V][0]=i;
N[V][1]=j;
if(M<V)M=V;
if(rs<=i)rs=i+1;
if(cs<=j)cs=j+1;
j+=L+1;
}
string F(rs*cs,' '),lines="\\|/-+-/|\\";
F[cs*N[1][0]+N[1][1]]='+';
for(i=2;i<=M;++i){
int r=N[i-1][0],c=N[i-1][1],rt=N[i][0],ct=N[i][1];
for(dr=S(rt-r),dc=S(ct-c);r!=rt||c!=ct;)
r+=dr,c+=dc,F[cs*r+c]=lines[(dr+1)*3+dc+1];
F[cs*r+c]='+';
}
for(i=0;i<rs;i++)
cout<<string(&F[i*cs],cs)+"\n";
}
Despite superficial differences, it's a blatant theft of morotspaj's code.